In [1]:
#Project Title: "Predicting Customer Churn in a Telecom Company"
#Goal: Build a binary classifier to predict if a customer will leave (churn) based on features like tenure, plan type, and usage.
#Challenge: Handle imbalanced classes and optimize precision/recall.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset,Dataset
import numpy as np
import pandas as pd
import torch.optim as optim



In [3]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

csv_path = "csv/WA_Fn-UseC_-Telco-Customer-Churn.csv"

df = pd.read_csv(csv_path)
#print(df.head())
#print(df.isnull().sum())
#print(df.dtypes)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges'])
df = df.drop('customerID', axis=1)
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
binary_cols = ['gender', 'Partner', 'Dependents', 'PaperlessBilling', 'PhoneService']

for col in binary_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0, 'Male': 1, 'Female': 0})
multi_cat_cols = [
    'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 
    'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 
    'Contract', 'PaymentMethod'
]

df = pd.get_dummies(df, columns=multi_cat_cols, drop_first=True)
num_cols=['tenure','MonthlyCharges','TotalCharges']
scaler=StandardScaler()
df[num_cols]=scaler.fit_transform(df[num_cols])

bool_cols = df.select_dtypes(include='bool').columns
df[bool_cols] = df[bool_cols].astype('int64')
X = df.drop('Churn', axis=1).values.astype('float32')
y = df['Churn'].values.astype('float32')

print(X.dtype)  # should print float32 now
print(X.shape)  # (5625, 30) as before



float32
(7032, 30)


In [4]:
X=df.drop('Churn',axis=1).values
y=df['Churn'].values

X_train,X_temp,y_train,y_temp=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
X_val,X_test,y_val,y_test=train_test_split(X_temp,y_temp,test_size=0.5,random_state=42,stratify=y_temp)
print(X_train.dtype)
print(X_train.dtype)  # should be float32
print(X_train.shape)  # should be (samples, features)


float64
float64
(5625, 30)


In [5]:
class ChurnDataset(Dataset):
    def __init__(self,features,labels):
        self.X=torch.tensor(features,dtype=torch.float32)
        self.y=torch.tensor(labels,dtype=torch.float32)
    def __len__(self):
        return len(self.y)
    def __getitem__(self,idx):
        return self.X[idx],self.y[idx]

train_dataset=ChurnDataset(X_train,y_train)
val_dataset=ChurnDataset(X_val,y_val)
test_dataset=ChurnDataset(X_test,y_test)

train_loader=DataLoader(train_dataset,batch_size=64,shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=64,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=64,shuffle=True)

In [6]:
print(X.shape)

(7032, 30)


In [7]:
class CustomerChurn(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super().__init__()
        self.fc1=nn.Linear(input_size,hidden_size)
        self.Relu=nn.ReLU()
        self.fc2=nn.Linear(hidden_size,output_size)
    def forward(self,x):
        out=self.fc1(x)
        out=self.Relu(out)
        out=self.fc2(out)
        return out


In [8]:
model=CustomerChurn(30,16,1)
criterion=nn.BCEWithLogitsLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)

In [10]:
num_epochs=10
for epochs in range(num_epochs):
    epoch_loss=0
    for inputs,labels in train_loader:
        labels=labels.unsqueeze(1)
        outputs=model(inputs)
        #output=outputs.unsqueeze(1)
        #print(outputs.shape)
        #print(labels.shape)
        batch_loss=criterion(outputs,labels)
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        epoch_loss+=batch_loss.item()
    avg_train_loss=epoch_loss/len(train_loader)
    print(f"epoch {epochs+1},loss:{avg_train_loss}")


epoch 1,loss:0.4453841261565685
epoch 2,loss:0.4308989965780215
epoch 3,loss:0.4261187457225539
epoch 4,loss:0.42378256944092835
epoch 5,loss:0.42181059650399466
epoch 6,loss:0.41966556960886175
epoch 7,loss:0.41838105564767664
epoch 8,loss:0.4172363540327007
epoch 9,loss:0.4166714427146045
epoch 10,loss:0.4159177043898539


In [11]:
model.eval()
val_loss=0
correct=0
total=0
with torch.no_grad():
    for val_inputs,val_labels in val_loader:
        val_outputs=model(val_inputs)
        val_labels=val_labels.unsqueeze(1)
        #print(val_outputs.shape)
        #print(val_labels.shape)
        loss=criterion(val_outputs,val_labels)
        val_loss+=loss.item()

        preds=torch.sigmoid(val_outputs)
        predicted=(preds>0.5).float()
        correct+=(predicted==val_labels).sum().item()
        total+=val_labels.size(0)

avg_val_loss=val_loss/len(val_loader)
val_accuracy=correct/total
print(f"Epoch {epochs+1}/{num_epochs} - "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Val Loss: {avg_val_loss:.4f}, "
          f"Val Accuracy: {val_accuracy:.4f}")

        

Epoch 10/10 - Train Loss: 0.4159, Val Loss: 0.4504, Val Accuracy: 0.7937


In [12]:
model.eval()
test_correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        labels = labels.unsqueeze(1)
        outputs = model(inputs)
        preds = torch.sigmoid(outputs)
        predicted = (preds > 0.5).float()
        test_correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = test_correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")


Test Accuracy: 0.8082


In [13]:
torch.save(model.state_dict(), "customer_churn_prediction.pt")
