In [4]:
!pip install numpy==1.21.5
!pip install pandas==1.3.5
!pip install scikit_learn==1.0.2
!pip install torch==1.10.1
!pip install matplotlib==3.5.1

In [1]:
#importing required libraries
from torch import nn
import pandas as pd
import matplotlib as plt

In [2]:
#loading data
df = pd.read_csv("../Input/data.csv")

In [3]:
df.head()

Unnamed: 0,year,customer_id,phone_no,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,2015,100198,409-8743,Female,36,62,no,no,148.35,12.2,16.81,82,1,4.0,1,0.0
1,2015,100643,340-5930,Female,39,149,no,no,294.45,7.7,33.37,87,3,3.0,2,0.0
2,2015,100756,372-3750,Female,65,126,no,no,87.3,11.9,9.89,91,1,4.0,5,1.0
3,2015,101595,331-4902,Female,24,131,no,yes,321.3,9.5,36.41,102,4,3.0,3,0.0
4,2015,101653,351-8398,Female,40,191,no,no,243.0,10.9,27.54,83,7,3.0,1,0.0


In [8]:
#dropping columns
data = df.drop(["customer_id", "phone_no", "year"], axis=1)

In [9]:
data.tail()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
1995,Female,54,75,no,yes,182.25,11.3,20.66,97,5,4.0,2,
1996,Male,45,127,no,no,273.45,9.3,30.99,116,3,3.0,1,0.0
1997,,53,94,no,no,128.85,15.6,14.6,110,16,5.0,0,0.0
1998,Male,40,94,no,no,178.05,10.4,20.18,100,6,,3,0.0
1999,Male,37,73,no,no,326.7,10.3,37.03,89,6,3.0,1,1.0


In [10]:
data.describe()

Unnamed: 0,age,no_of_days_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,1972.0,2000.0,1965.0
mean,38.6905,99.75,270.178425,10.1987,30.62078,100.4155,4.4825,3.250507,1.547,0.133333
std,10.20641,39.755386,80.551627,2.785519,9.129165,19.529454,2.487728,0.809084,1.315164,0.340021
min,18.0,1.0,0.0,0.0,0.0,42.0,0.0,0.0,0.0,0.0
25%,32.0,73.0,218.2125,8.4,24.735,87.0,3.0,3.0,1.0,0.0
50%,37.0,99.0,269.925,10.2,30.59,101.0,4.0,3.0,1.0,0.0
75%,44.0,127.0,324.675,12.0,36.7975,114.0,6.0,4.0,2.0,0.0
max,82.0,243.0,526.2,20.0,59.64,175.0,19.0,6.0,9.0,1.0


## Droping Null Values

In [11]:
#checking for null values
data.isna().sum()

gender                    24
age                        0
no_of_days_subscribed      0
multi_screen               0
mail_subscribed            0
weekly_mins_watched        0
minimum_daily_mins         0
maximum_daily_mins         0
weekly_max_night_mins      0
videos_watched             0
maximum_days_inactive     28
customer_support_calls     0
churn                     35
dtype: int64

In [12]:
#shape of the data
data.shape

(2000, 13)

In [13]:
#dropping null values
data = data.dropna(axis=0)

In [14]:
#shape of the data after dropping null values
data.shape

(1918, 13)

## Categorical Variables

In [15]:
#unique values in gender column
data["gender"].unique()

array(['Female', 'Male'], dtype=object)

In [16]:
print(data["multi_screen"].unique())
print(data["mail_subscribed"].unique())

['no' 'yes']
['no' 'yes']


In [17]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [18]:
#label encoding categorical features
data["gender"] = le.fit_transform(data["gender"])
data["multi_screen"] = le.fit_transform(data["multi_screen"])
data["mail_subscribed"] = le.fit_transform(data["mail_subscribed"])

In [19]:
data.head()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,0,36,62,0,0,148.35,12.2,16.81,82,1,4.0,1,0.0
1,0,39,149,0,0,294.45,7.7,33.37,87,3,3.0,2,0.0
2,0,65,126,0,0,87.3,11.9,9.89,91,1,4.0,5,1.0
3,0,24,131,0,1,321.3,9.5,36.41,102,4,3.0,3,0.0
4,0,40,191,0,0,243.0,10.9,27.54,83,7,3.0,1,0.0


In [20]:
#distribution of the target column
data.groupby("churn").size()

churn
0.0    1665
1.0     253
dtype: int64

## Numerical

In [21]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [22]:
#dropping categorical columns and keeping numeircal columns only
data_num = data.drop(["gender", "multi_screen", "mail_subscribed"], axis=1)

In [23]:
cols = data_num.columns 
data_num = scaler.fit_transform(data_num)

In [24]:
#list of numerical columns only
cols = list(cols)

In [25]:
data[cols] = data_num

In [26]:
data.head()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,0,0.28125,0.252066,0,0,0.281927,0.61,0.281858,0.300752,0.052632,0.666667,0.111111,0.0
1,0,0.328125,0.61157,0,0,0.559578,0.385,0.559524,0.338346,0.157895,0.5,0.222222,0.0
2,0,0.734375,0.516529,0,0,0.165906,0.595,0.165828,0.368421,0.052632,0.666667,0.555556,1.0
3,0,0.09375,0.53719,0,1,0.610604,0.475,0.610496,0.451128,0.210526,0.5,0.333333,0.0
4,0,0.34375,0.785124,0,0,0.461802,0.545,0.461771,0.308271,0.368421,0.5,0.111111,0.0


In [27]:
#defining dependent and indenpendent variables
X = data.drop("churn", axis=1)
Y = data["churn"].astype(int)

In [28]:
# split a dataset into train and test sets
from sklearn.model_selection import train_test_split

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X.values, Y, test_size=0.2, random_state=42)

## Sequential Neural Network

In [30]:
#shape of the training data
X.shape

(1918, 12)

In [31]:
#Hyperparameters for our network
input_size= X.shape[1]
hidden_sizes = [128, 64]
output_size = 2

In [32]:
import torch
from torch import nn
# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.Softmax(dim=1))
print(model)

Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=2, bias=True)
  (5): Softmax(dim=1)
)


In [33]:
from collections import OrderedDict
model_dict = nn.Sequential(OrderedDict([
                      ('fc1', nn.Linear(input_size, hidden_sizes[0])),
                      ('relu1', nn.ReLU()),
                      ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
                      ('relu2', nn.ReLU()),
                      ('output', nn.Linear(hidden_sizes[1], output_size)),
                      ('softmax', nn.Softmax(dim=1))]))


In [34]:
 # Define the loss
criterion = nn.NLLLoss()

In [35]:
from torch import optim
# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [36]:
import torch.utils.data as Data
from torch import Tensor
import numpy as np

#converting data into tensor
X_train = Tensor(X_train)
y_train = Tensor(np.array(y_train))

In [37]:
BATCH_SIZE = 64
# EPOCH = 200

torch_dataset = Data.TensorDataset(X_train, y_train)

#loading data for the model
loader = Data.DataLoader(
    dataset=torch_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, num_workers=2,)

## Training the model

In [38]:
import torch
from torch.autograd import Variable

epochs = 100
for e in range(epochs):
    running_loss = 0
    for step, (batch_x, batch_y) in enumerate(loader):

        b_x = Variable(batch_x)
        b_y = Variable(batch_y.type(torch.LongTensor))
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(b_x)
        loss = criterion(output, b_y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    else:
        print(f"Training loss: {running_loss/len(X_train)}")

Training loss: -0.007764913209724923
Training loss: -0.008279809856974317
Training loss: -0.008807059372574907
Training loss: -0.00934175080395118
Training loss: -0.009877931380054319
Training loss: -0.010406260345874274
Training loss: -0.010907325432229073
Training loss: -0.011363567592578575
Training loss: -0.011759582523261397
Training loss: -0.012091111095508044
Training loss: -0.012360696686915033
Training loss: -0.012576443158974082
Training loss: -0.01274770758608951
Training loss: -0.01288036491134176
Training loss: -0.012988415543072538
Training loss: -0.013073865018270162
Training loss: -0.013144111540047099
Training loss: -0.013199973487170143
Training loss: -0.01324689357190586
Training loss: -0.013286307279258539
Training loss: -0.013320732691763587
Training loss: -0.013347436194314173
Training loss: -0.013372294556208664
Training loss: -0.013392008967318778
Training loss: -0.013411497510241094
Training loss: -0.013425560320838022
Training loss: -0.013439034156724714
Train

In [39]:
#testing with test data
X_test = Tensor(X_test)
y_test = Tensor(np.array(y_test))
z = model(X_test)

In [40]:
z

tensor([[9.9961e-01, 3.8929e-04],
        [9.9977e-01, 2.2851e-04],
        [9.9995e-01, 4.7851e-05],
        [9.9993e-01, 6.5534e-05],
        [9.9996e-01, 4.2665e-05],
        [9.9999e-01, 1.1407e-05],
        [9.9991e-01, 9.4185e-05],
        [9.9999e-01, 7.7333e-06],
        [9.9991e-01, 8.9144e-05],
        [9.9988e-01, 1.1666e-04],
        [9.9982e-01, 1.7669e-04],
        [9.9987e-01, 1.2996e-04],
        [9.9997e-01, 3.0559e-05],
        [9.9989e-01, 1.1265e-04],
        [9.9993e-01, 6.9915e-05],
        [9.9996e-01, 4.4919e-05],
        [9.9994e-01, 5.8609e-05],
        [9.9998e-01, 2.1187e-05],
        [9.9995e-01, 5.3770e-05],
        [9.9998e-01, 2.3366e-05],
        [9.9999e-01, 7.6403e-06],
        [9.9996e-01, 4.2651e-05],
        [9.9998e-01, 1.6911e-05],
        [9.9981e-01, 1.8880e-04],
        [9.9999e-01, 1.4817e-05],
        [9.9993e-01, 6.7087e-05],
        [9.9977e-01, 2.2741e-04],
        [9.9997e-01, 2.7352e-05],
        [9.9994e-01, 6.0926e-05],
        [9.999

In [41]:
#getting prediction by the model
yhat = list(z.argmax(1))

In [42]:
#converting tensor into list
y_test = list(y_test)

In [43]:
#accuracy score
from sklearn.metrics import accuracy_score
accuracy_score(y_test,yhat)

0.859375