<a href="https://colab.research.google.com/github/AshokYarabati/Customer_Valuation_using_Logistic-Reg./blob/main/Customer_Valuation_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [24]:
url="https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv"
df=pd.read_csv(url)

In [26]:
print(f"Shape: {df.shape}")
print(df.columns.tolist())
print(df.head(5))

Shape: (7043, 21)
['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn']
   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No      

In [28]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()

high_value_threshold = df['MonthlyCharges'].quantile(0.7)
tenure_threshold = df['tenure'].quantile(0.7)
df['high_value'] = ((df['MonthlyCharges'] >= high_value_threshold) &
                    (df['tenure'] >= tenure_threshold)).astype(int)

print(f"High value customers: {df['high_value'].sum()} out of {len(df)}")
print(f"High value rate: {(df['high_value'].mean()*100):.1f}%")

internet_dummies = pd.get_dummies(df['InternetService'], prefix='Internet')
contract_dummies = pd.get_dummies(df['Contract'], prefix='Contract')

df_processed = pd.concat([
    df[['tenure', 'MonthlyCharges', 'TotalCharges', 'SeniorCitizen']],
    internet_dummies,
    contract_dummies,
    df['high_value']
], axis=1)

print("\nFirst 3 rows:")
print(df_processed.head(3))

High value customers: 987 out of 7032
High value rate: 14.0%

First 3 rows:
   tenure  MonthlyCharges  TotalCharges  SeniorCitizen  Internet_DSL  \
0       1           29.85         29.85              0          True   
1      34           56.95       1889.50              0          True   
2       2           53.85        108.15              0          True   

   Internet_Fiber optic  Internet_No  Contract_Month-to-month  \
0                 False        False                     True   
1                 False        False                    False   
2                 False        False                     True   

   Contract_One year  Contract_Two year  high_value  
0              False              False           0  
1               True              False           0  
2              False              False           0  


In [29]:
X = df_processed.drop('high_value', axis=1)
y = df_processed['high_value']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Testing set: {X_test.shape[0]} samples")


from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)


train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print(f"\n--- MODEL PERFORMANCE ---")
print(f"Training Accuracy: {train_score:.3f}")
print(f"Testing Accuracy: {test_score:.3f}")

probabilities = model.predict_proba(X_test)[:, 1]

customer_decisions = []
for i, prob in enumerate(probabilities):
    if prob > 0.7:
        decision = "PREMIUM ACQUISITION"
    elif prob > 0.4:
        decision = "STANDARD ACQUISITION"
    else:
        decision = "BASIC ACQUISITION"

    customer_decisions.append({
        'Customer_ID': X_test.index[i],
        'High_Value_Probability': round(prob, 3),
        'Recommended_Action': decision
    })


results_df = pd.DataFrame(customer_decisions)
print("Customer Acquisition Recommendations:")
print(results_df.head(10))


Features shape: (7032, 10)
Target shape: (7032,)
Training set: 5625 samples
Testing set: 1407 samples

--- MODEL PERFORMANCE ---
Training Accuracy: 0.975
Testing Accuracy: 0.978
Customer Acquisition Recommendations:
   Customer_ID  High_Value_Probability   Recommended_Action
0         1775                   1.000  PREMIUM ACQUISITION
1         1529                   0.000    BASIC ACQUISITION
2         3812                   0.000    BASIC ACQUISITION
3         3310                   0.000    BASIC ACQUISITION
4         3336                   0.000    BASIC ACQUISITION
5         2284                   0.000    BASIC ACQUISITION
6         3208                   0.001    BASIC ACQUISITION
7         1010                   0.000    BASIC ACQUISITION
8         6429                   0.007    BASIC ACQUISITION
9         5586                   0.000    BASIC ACQUISITION
