In [None]:
!pip install imblearn==0.0
!pip install numpy==1.21.5
!pip install pandas==1.3.5
!pip install scikit_learn==1.0.2
!pip install torch==1.10.1
!pip install matplotlib==3.4.3

In [3]:
from torch import nn
import pandas as pd
import matplotlib as plt

In [4]:
# Reading data
df = pd.read_csv("../Input/data.csv")

In [5]:
df.head()

Unnamed: 0,year,customer_id,phone_no,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,2015,100198,409-8743,Female,36,62,no,no,148.35,12.2,16.81,82,1,4.0,1,0.0
1,2015,100643,340-5930,Female,39,149,no,no,294.45,7.7,33.37,87,3,3.0,2,0.0
2,2015,100756,372-3750,Female,65,126,no,no,87.3,11.9,9.89,91,1,4.0,5,1.0
3,2015,101595,331-4902,Female,24,131,no,yes,321.3,9.5,36.41,102,4,3.0,3,0.0
4,2015,101653,351-8398,Female,40,191,no,no,243.0,10.9,27.54,83,7,3.0,1,0.0


In [6]:
# Shape of the data
df.shape

(2000, 16)

In [7]:
# Dropping columns "customer_id", "phone_no", and "year" from the DataFrame "df"
data = df.drop(["customer_id", "phone_no", "year"], axis=1)


In [8]:
data.tail()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
1995,Female,54,75,no,yes,182.25,11.3,20.66,97,5,4.0,2,
1996,Male,45,127,no,no,273.45,9.3,30.99,116,3,3.0,1,0.0
1997,,53,94,no,no,128.85,15.6,14.6,110,16,5.0,0,0.0
1998,Male,40,94,no,no,178.05,10.4,20.18,100,6,,3,0.0
1999,Male,37,73,no,no,326.7,10.3,37.03,89,6,3.0,1,1.0


In [9]:
data.describe()

Unnamed: 0,age,no_of_days_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,1972.0,2000.0,1965.0
mean,38.6905,99.75,270.178425,10.1987,30.62078,100.4155,4.4825,3.250507,1.547,0.133333
std,10.20641,39.755386,80.551627,2.785519,9.129165,19.529454,2.487728,0.809084,1.315164,0.340021
min,18.0,1.0,0.0,0.0,0.0,42.0,0.0,0.0,0.0,0.0
25%,32.0,73.0,218.2125,8.4,24.735,87.0,3.0,3.0,1.0,0.0
50%,37.0,99.0,269.925,10.2,30.59,101.0,4.0,3.0,1.0,0.0
75%,44.0,127.0,324.675,12.0,36.7975,114.0,6.0,4.0,2.0,0.0
max,82.0,243.0,526.2,20.0,59.64,175.0,19.0,6.0,9.0,1.0


## Droping Null Values

In [10]:
# Checking null values
data.isna().sum()

gender                    24
age                        0
no_of_days_subscribed      0
multi_screen               0
mail_subscribed            0
weekly_mins_watched        0
minimum_daily_mins         0
maximum_daily_mins         0
weekly_max_night_mins      0
videos_watched             0
maximum_days_inactive     28
customer_support_calls     0
churn                     35
dtype: int64

In [11]:
# Shape of data before dropping null values
data.shape

(2000, 13)

In [12]:
# Dropping null values
data = data.dropna(axis=0)

In [13]:
# Shape of data after dropping null values
data.shape

(1918, 13)

## Categorical Variables

In [14]:
# Getting unique categories in the "gender" column
data["gender"].unique()

array(['Female', 'Male'], dtype=object)

In [15]:
# Printing unique values in the "multi_screen" column
print(data["multi_screen"].unique())

# Printing unique values in the "mail_subscribed" column
print(data["mail_subscribed"].unique())

['no' 'yes']
['no' 'yes']


In [16]:
# Create a LabelEncoder instance for encoding categorical labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()


In [17]:
# Encode 'gender' column
data["gender"] = le.fit_transform(data["gender"])

# Encode 'multi_screen' column
data["multi_screen"] = le.fit_transform(data["multi_screen"])

# Encode 'mail_subscribed' column
data["mail_subscribed"] = le.fit_transform(data["mail_subscribed"])


In [18]:
data.head()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,0,36,62,0,0,148.35,12.2,16.81,82,1,4.0,1,0.0
1,0,39,149,0,0,294.45,7.7,33.37,87,3,3.0,2,0.0
2,0,65,126,0,0,87.3,11.9,9.89,91,1,4.0,5,1.0
3,0,24,131,0,1,321.3,9.5,36.41,102,4,3.0,3,0.0
4,0,40,191,0,0,243.0,10.9,27.54,83,7,3.0,1,0.0


In [19]:
# Group the data by the "churn" column and count the occurrences of each unique value
data.groupby("churn").size()

churn
0.0    1665
1.0     253
dtype: int64

## Numerical

In [20]:
# Import the MinMaxScaler class from scikit-learn
from sklearn.preprocessing import MinMaxScaler

# Create an instance of the MinMaxScaler
scaler = MinMaxScaler()


In [21]:
# Dropping specified categorical columns and keeping numerical columns
data_num = data.drop(["gender", "multi_screen", "mail_subscribed"], axis=1)

In [22]:
# Scaling numerical columns using Min-Max scaler
cols = data_num.columns  # Get the column names
data_num = scaler.fit_transform(data_num)  # Apply Min-Max scaling to the numerical columns


In [23]:
# List of numerical columns
cols = list(cols)
cols

['age',
 'no_of_days_subscribed',
 'weekly_mins_watched',
 'minimum_daily_mins',
 'maximum_daily_mins',
 'weekly_max_night_mins',
 'videos_watched',
 'maximum_days_inactive',
 'customer_support_calls',
 'churn']

In [24]:
# assign the scaled numerical values back to the original DataFrame.
data[cols] = data_num

In [25]:
data.head()

Unnamed: 0,gender,age,no_of_days_subscribed,multi_screen,mail_subscribed,weekly_mins_watched,minimum_daily_mins,maximum_daily_mins,weekly_max_night_mins,videos_watched,maximum_days_inactive,customer_support_calls,churn
0,0,0.28125,0.252066,0,0,0.281927,0.61,0.281858,0.300752,0.052632,0.666667,0.111111,0.0
1,0,0.328125,0.61157,0,0,0.559578,0.385,0.559524,0.338346,0.157895,0.5,0.222222,0.0
2,0,0.734375,0.516529,0,0,0.165906,0.595,0.165828,0.368421,0.052632,0.666667,0.555556,1.0
3,0,0.09375,0.53719,0,1,0.610604,0.475,0.610496,0.451128,0.210526,0.5,0.333333,0.0
4,0,0.34375,0.785124,0,0,0.461802,0.545,0.461771,0.308271,0.368421,0.5,0.111111,0.0


In [26]:
# X = data.drop("churn", axis=1)
# Y = data["churn"].astype(int)

# Prepare feature and target variables, and check class distribution.
data.groupby("churn").size()


churn
0.0    1665
1.0     253
dtype: int64

In [27]:
# Handling Class Imbalance using SMOTE
from imblearn.over_sampling import SMOTE

smote = SMOTE()

In [28]:
# x_smote contains the resampled predictor variables, and y_smote contains the corresponding target variable.
x_smote, y_smote = smote.fit_resample(data.iloc[:, 0:-1], data['churn'])


In [29]:
# Print the shape of the original dataset (length of 'data') and the shape of the resampled dataset (length of 'y_smote').
print('Original dataset shape', len(data))
print('Resampled dataset shape', len(y_smote))


Original dataset shape 1918
Resampled dataset shape 3330


In [30]:
# Check the class distribution after class balancing using SMOTE and display the count of each class.
y_smote.groupby(y_smote).size()


churn
0.0    1665
1.0    1665
Name: churn, dtype: int64

In [31]:
# Split a dataset into train and test sets
from sklearn.model_selection import train_test_split

In [32]:
# Split the resampled data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(x_smote, y_smote, test_size=0.2, random_state=42, stratify=y_smote)


In [33]:
# Print the counts of class 0 in the test set.
print((y_test == 0).sum())

# Print the counts of class 1 in the test set.
print((y_test == 1).sum())


333
333


In [34]:
# Print the counts of class 0 in the training set.
print((y_train == 0).sum())

# Print the counts of class 1 in the training set.
print((y_train == 1).sum())


1332
1332


## Training Neural Networks

In [35]:
# Define hyperparameters for the neural network:
# - 'input_size': The number of features in the input data, which is equal to the number of columns in X_train.
input_size = X_train.shape[1]

# - 'hidden_sizes': A list specifying the number of neurons in each hidden layer. In this case, there are two hidden layers with 128 and 64 neurons.
hidden_sizes = [128, 64]

# - 'output_size': The number of output neurons, which is typically the number of classes in the classification problem. In this case, it's set to 2.
output_size = 2


### Basic Neural Net

In [36]:
import torch
from torch import nn

# Build a feed-forward neural network
model = nn.Sequential(
    nn.Linear(input_size, hidden_sizes[0]),  # First linear layer: input_size to hidden_sizes[0]
    nn.ReLU(),                               
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),  # Second linear layer: hidden_sizes[0] to hidden_sizes[1]
    nn.ReLU(),                               
    nn.Linear(hidden_sizes[1], output_size),    # Third linear layer: hidden_sizes[1] to output_size
    nn.Softmax(dim=1)                         # Softmax activation for classification
)

# Print the model architecture
print(model)


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=2, bias=True)
  (5): Softmax(dim=1)
)


In [37]:
# Define the loss function as Negative Log Likelihood Loss (NLLLoss).
criterion = nn.NLLLoss()


In [38]:
from torch import optim

# Create an SGD optimizer for the model's parameters with a learning rate of 0.01.
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [39]:
import torch.utils.data as Data
from torch import Tensor
import numpy as np

# Convert X_train (predictor variables) to a PyTorch Tensor
X_train = Tensor(X_train.values)

# Convert y_train (target variable) to a PyTorch Tensor
y_train = Tensor(np.array(y_train))


In [40]:
BATCH_SIZE = 64  # Define the batch size for training
# EPOCH = 200 

# Create a PyTorch TensorDataset using X_train and y_train
torch_dataset = Data.TensorDataset(X_train, y_train)

# Create a data loader for batching and shuffling the dataset
loader = Data.DataLoader(
    dataset=torch_dataset,  # Use the TensorDataset as the dataset
    batch_size=BATCH_SIZE,  # Set the batch size
    shuffle=True,           # Shuffle the data during training
    num_workers=2,          # Use 2 workers for data loading (parallel loading)
)


In [41]:
import torch
from torch.autograd import Variable

epochs = 100  # Number of training epochs
for e in range(epochs):
    running_loss = 0
    for step, (batch_x, batch_y) in enumerate(loader):

        b_x = Variable(batch_x)  # Convert batch_x to a PyTorch Variable
        b_y = Variable(batch_y.type(torch.LongTensor))  # Convert batch_y to a PyTorch Variable with the correct data type
        
        # Training pass
        optimizer.zero_grad()  # Clear the gradients

        output = model(b_x)  # Forward pass through the model
        loss = criterion(output, b_y)  # Calculate the loss
        loss.backward()  # Backpropagation to compute gradients
        optimizer.step()  # Update model parameters using the optimizer

        running_loss += loss.item()  # Accumulate the loss

    else:
        print(f"Training loss: {running_loss / len(X_train)}")  # Print the average loss for the current epoch


Training loss: -0.00789778415594731
Training loss: -0.007905048449655195
Training loss: -0.007917628542439
Training loss: -0.007932962295976845
Training loss: -0.007942577270237176
Training loss: -0.007954382379581262
Training loss: -0.007963758185729608
Training loss: -0.007980194278397001
Training loss: -0.007990321306673973
Training loss: -0.008004433489418603
Training loss: -0.008016235332141767
Training loss: -0.00803038292192482
Training loss: -0.008042726665735245
Training loss: -0.008059851523186709
Training loss: -0.00807207761270864
Training loss: -0.008088584127905848
Training loss: -0.008101366225395116
Training loss: -0.008117792797876193
Training loss: -0.008139217065440284
Training loss: -0.00815721685150722
Training loss: -0.008174789437392095
Training loss: -0.00819857581868186
Training loss: -0.00821892346616264
Training loss: -0.008241440686914656
Training loss: -0.008265849650353641
Training loss: -0.008293946360324597
Training loss: -0.008323486256706822
Training l

In [42]:
# Convert the test data 'X_test' to PyTorch Tensor
X_test_tensor = Tensor(X_test.values)

# Convert the test target variable 'y_test' to PyTorch Tensor
y_test = Tensor(np.array(y_test))

# Perform inference (forward pass) on the test data using the trained model 'model'
z = model(X_test_tensor)


In [43]:
from sklearn.metrics import accuracy_score

# Convert the model's predictions 'z' to a list of predicted class labels
yhat = list(z.argmax(1))

# Convert the test target variable 'y_test' to a list
y_test = list(y_test)

# Calculate the accuracy score by comparing the predicted labels with the true labels and print the result
print("Accuracy Score of Test Data:", accuracy_score(y_test, yhat) * 100)


Accuracy Score of Test Data  70.12012012012012


### Dropout

In [44]:
# Define the architecture of a feed-forward neural network with dropout layers.
hidden_sizes = [128, 64, 32, 16]  # Define the number of neurons in each hidden layer.

model_dropout = nn.Sequential(
    nn.Linear(input_size, hidden_sizes[0]),  # First linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),  # Second linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], hidden_sizes[2]),  # Third linear layer
    nn.Dropout(0.1),  # Dropout layer with a 10% probability of dropout
    nn.ReLU(), 
    nn.Linear(hidden_sizes[2], output_size),  # Fourth linear layer
    nn.Softmax(dim=1)  # Softmax activation for classification
)

# Print the architecture of the model with dropout layers.
print(model_dropout)


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): Dropout(p=0.2, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): Dropout(p=0.2, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=32, bias=True)
  (7): Dropout(p=0.1, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=32, out_features=2, bias=True)
  (10): Softmax(dim=1)
)


In [45]:
# Define the loss function as Negative Log Likelihood Loss (NLLLoss).
criterion = nn.NLLLoss()

# Create an SGD optimizer for the model_dropout's parameters with a learning rate of 0.01.
optimizer = optim.SGD(model_dropout.parameters(), lr=0.01)


In [46]:
import torch
from torch.autograd import Variable

epochs = 100  # Number of training epochs
for e in range(epochs):
    running_loss = 0
    for step, (batch_x, batch_y) in enumerate(loader):

        b_x = Variable(batch_x)  # Convert batch_x to a PyTorch Variable
        b_y = Variable(batch_y.type(torch.LongTensor))  # Convert batch_y to a PyTorch Variable with the correct data type
        
        # Training pass
        optimizer.zero_grad()  # Clear the gradients

        output = model_dropout(b_x)  # Forward pass through the model_dropout
        loss = criterion(output, b_y)  # Calculate the loss
        loss.backward()  # Backpropagation to compute gradients
        optimizer.step()  # Update model_dropout's parameters using the optimizer

        running_loss += loss.item()  # Accumulate the loss

    else:
        print(f"Training loss: {running_loss / len(X_train)}")  # Print the average loss for the current epoch


Training loss: -0.007888370638226604
Training loss: -0.007894002728992038
Training loss: -0.007897055990345127
Training loss: -0.007894492431266888
Training loss: -0.007900295169414344
Training loss: -0.007897352134143268
Training loss: -0.00790988518534838
Training loss: -0.007908739809606885
Training loss: -0.007911512052064185
Training loss: -0.007911378411469874
Training loss: -0.007909666836351246
Training loss: -0.007914458074279734
Training loss: -0.007920038089290395
Training loss: -0.007917347131996183
Training loss: -0.007925467123766919
Training loss: -0.007936260005733272
Training loss: -0.00793001128433345
Training loss: -0.007932183766239756
Training loss: -0.007931865561563332
Training loss: -0.007945480390085472
Training loss: -0.00794787975671413
Training loss: -0.007954389740665396
Training loss: -0.007954380276414368
Training loss: -0.007964967327701437
Training loss: -0.007968773444493612
Training loss: -0.007963658688036172
Training loss: -0.007969736437629294
Trai

In [47]:
from sklearn.metrics import accuracy_score

# Convert the test data 'X_test' to PyTorch Tensor
X_test_tensor = Tensor(X_test.values)

# Convert the test target variable 'y_test' to a PyTorch Tensor
y_test = Tensor(np.array(y_test))

# Perform inference (forward pass) on the test data using the model with dropout layers ('model_dropout')
z = model_dropout(X_test_tensor)

# Convert the model's predictions 'z' to a list of predicted class labels
yhat = list(z.argmax(1))

# Convert the test target variable 'y_test' to a list
y_test = list(y_test)

# Calculate the accuracy score by comparing the predicted labels with the true labels and print the result
print("Accuracy Score of Test Data:", accuracy_score(y_test, yhat) * 100)


Accuracy Score of Test Data  66.96696696696696


### Regularization

In [48]:
# Define the architecture of a feed-forward neural network with dropout layers.
hidden_sizes = [128, 64]  # Define the number of neurons in each hidden layer.

model_reg = nn.Sequential(
    nn.Linear(input_size, hidden_sizes[0]),  # First linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),  # Second linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], hidden_sizes[1]),  # Third linear layer (hidden_sizes[1] repeated)
    nn.Dropout(0.1),  # Dropout layer with a 10% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], output_size),  # Fourth linear layer
    nn.Softmax(dim=1)  # Softmax activation for classification
)

# Print the architecture of the model with dropout layers.
print(model_reg)


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): Dropout(p=0.2, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): Dropout(p=0.2, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=64, bias=True)
  (7): Dropout(p=0.1, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=64, out_features=2, bias=True)
  (10): Softmax(dim=1)
)


In [49]:
# Define the loss function as Negative Log Likelihood Loss (NLLLoss).
criterion = nn.NLLLoss()

# Import the optimizer module and create an Adam optimizer for the 'model_reg' parameters.
# Set the learning rate to 1e-4 and add L2 regularization by specifying the 'weight_decay' parameter.
optimizer = optim.Adam(model_reg.parameters(), lr=1e-4, weight_decay=1e-5)


In [50]:
import torch
from torch.autograd import Variable

epochs = 100  # Number of training epochs
for e in range(epochs):
    running_loss = 0
    for step, (batch_x, batch_y) in enumerate(loader):

        b_x = Variable(batch_x)  # Convert batch_x to a PyTorch Variable
        b_y = Variable(batch_y.type(torch.LongTensor))  # Convert batch_y to a PyTorch Variable with the correct data type
        
        # Training pass
        optimizer.zero_grad()  # Clear the gradients

        output = model_reg(b_x)  # Forward pass through the model_reg
        loss = criterion(output, b_y)  # Calculate the loss
        loss.backward()  # Backpropagation to compute gradients
        optimizer.step()  # Update model_reg's parameters using the optimizer

        running_loss += loss.item()  # Accumulate the loss

    else:
        print(f"Training loss: {running_loss / len(X_train)}")  # Print the average loss for the current epoch


Training loss: -0.007889933548532091
Training loss: -0.00792435002890793
Training loss: -0.00796551549354115
Training loss: -0.008046041883237369
Training loss: -0.00816335097268537
Training loss: -0.008385614276648284
Training loss: -0.00871310897686102
Training loss: -0.009047251988996615
Training loss: -0.009459709015873459
Training loss: -0.009751390788827214
Training loss: -0.01008074426346713
Training loss: -0.010240384855785885
Training loss: -0.010439680369050653
Training loss: -0.010566812444914569
Training loss: -0.010715384092237856
Training loss: -0.01082327841101466
Training loss: -0.010916096066032444
Training loss: -0.011006011126993655
Training loss: -0.011152815174412084
Training loss: -0.011240321997407678
Training loss: -0.011230370952739372
Training loss: -0.011226870455183424
Training loss: -0.01128714668142187
Training loss: -0.011344281559412903
Training loss: -0.011517359643011121
Training loss: -0.011538142869780372
Training loss: -0.011599198296979384
Training

In [51]:
from sklearn.metrics import accuracy_score

# Convert the test data 'X_test' to PyTorch Tensor
X_test_tensor = Tensor(X_test.values)

# Convert the test target variable 'y_test' to a PyTorch Tensor
y_test = Tensor(np.array(y_test))

# Perform inference (forward pass) on the test data using the model with dropout layers and L2 regularization ('model_reg')
z = model_reg(X_test_tensor)

# Convert the model's predictions 'z' to a list of predicted class labels
yhat = list(z.argmax(1))

# Convert the test target variable 'y_test' to a list
y_test = list(y_test)

# Calculate the accuracy score by comparing the predicted labels with the true labels and print the result
print("Accuracy Score of Test Data:", accuracy_score(y_test, yhat) * 100)


Accuracy Score of Test Data  77.47747747747748


### Early Stopping

In [52]:
# Define the architecture of a feed-forward neural network with dropout layers.

hidden_sizes = [128, 64]  # Define the number of neurons in each hidden layer.

model_early_stp = nn.Sequential(
    nn.Linear(input_size, hidden_sizes[0]),  # First linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(), 
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),  # Second linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(), 
    nn.Linear(hidden_sizes[1], hidden_sizes[1]),  # Third linear layer (hidden_sizes[1] repeated)
    nn.Dropout(0.1),  # Dropout layer with a 10% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], output_size),  # Fourth linear layer
    nn.Softmax(dim=1)  # Softmax activation for classification
)

# Print the architecture of the model with dropout layers.
print(model_early_stp)


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): Dropout(p=0.2, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): Dropout(p=0.2, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=64, bias=True)
  (7): Dropout(p=0.1, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=64, out_features=2, bias=True)
  (10): Softmax(dim=1)
)


In [53]:
# Define the loss function as Negative Log Likelihood Loss (NLLLoss).
criterion = nn.NLLLoss()

# Import the optimizer module and create an Adam optimizer for the 'model_early_stp' parameters.
# Set the learning rate to 1e-4 and add L2 regularization by specifying the 'weight_decay' parameter.
optimizer = optim.Adam(model_early_stp.parameters(), lr=1e-4, weight_decay=1e-5)


In [54]:
epochs = 100  # Number of training epochs
epochs_no_improve = 0  # Counter for epochs with no improvement
early_stop = False  # A flag to indicate whether early stopping criteria have been met
min_loss = np.Inf  # Initialize a variable to store the minimum loss observed
iter = 0  # Iteration counter (optional)


In [55]:
import torch
from torch.autograd import Variable

for e in range(epochs):
    running_loss = 0

    if early_stop:
        print("Stopped")
        break
    else:
        for step, (batch_x, batch_y) in enumerate(loader):
            b_x = Variable(batch_x)
            b_y = Variable(batch_y.type(torch.LongTensor))

            # Training pass
            optimizer.zero_grad()

            output = model_early_stp(b_x)
            loss = criterion(output, b_y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Check for early stopping conditions
            if abs(running_loss) < abs(min_loss):
                epochs_no_improve = 0
                min_loss = running_loss
            else:
                epochs_no_improve += 1

            iter += 1

            # Check for early stopping based on a condition (e.g., no improvement for 'epochs' epochs)
            if e > 5 and epochs_no_improve == epochs:
                print('Early stopping!')
                early_stop = True
                break
            else:
                continue

        else:
            print(f"Training loss: {running_loss/len(X_train)}")  # Print the average loss for the current epoch


Training loss: -0.007904763817339688
Training loss: -0.007935701145066155
Training loss: -0.007988138308933189
Training loss: -0.008100458496325725
Training loss: -0.008247762448615857
Training loss: -0.00849939634402593
Training loss: -0.008786714135168551
Training loss: -0.009152155291210782
Training loss: -0.009548420371773007
Training loss: -0.009894377767323732
Training loss: -0.01019603262344996
Training loss: -0.01040448259573441
Training loss: -0.01058105247335749
Training loss: -0.010690960171702388
Training loss: -0.010919621756842902
Training loss: -0.010918568092602509
Training loss: -0.010990522399141982
Training loss: -0.011169575132407225
Training loss: -0.011149736182825701
Training loss: -0.011277639136479065
Training loss: -0.011329607741610782
Training loss: -0.01143415263286224
Training loss: -0.011387877882242919
Training loss: -0.01151456985924695
Training loss: -0.011481568083032832
Training loss: -0.011505221662757633
Training loss: -0.011605710440032833
Trainin

In [56]:
from sklearn.metrics import accuracy_score

# Convert the test data 'X_test' to PyTorch Tensor
X_test_tensor = Tensor(X_test.values)

# Convert the test target variable 'y_test' to a PyTorch Tensor
y_test = Tensor(np.array(y_test))

# Perform inference (forward pass) on the test data using the model with dropout layers and early stopping ('model_early_stp')
z = model_early_stp(X_test_tensor)

# Convert the model's predictions 'z' to a list of predicted class labels
yhat = list(z.argmax(1))

# Convert the test target variable 'y_test' to a list
y_test = list(y_test)

# Calculate the accuracy score by comparing the predicted labels with the true labels and print the result
print("Accuracy Score of Test Data:", accuracy_score(y_test, yhat) * 100)


Accuracy Score of Test Data  79.72972972972973


### Checkpoint (Loading and saving model)

In [57]:
# Define the architecture of a feed-forward neural network with dropout layers.
hidden_sizes = [128, 64]  # Define the number of neurons in each hidden layer.

model_chk = nn.Sequential(
    nn.Linear(input_size, hidden_sizes[0]),  # First linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[0], hidden_sizes[1]),  # Second linear layer
    nn.Dropout(0.2),  # Dropout layer with a 20% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], hidden_sizes[1]),  # Third linear layer (hidden_sizes[1] repeated)
    nn.Dropout(0.1),  # Dropout layer with a 10% probability of dropout
    nn.ReLU(),  
    nn.Linear(hidden_sizes[1], output_size),  # Fourth linear layer
    nn.Softmax(dim=1)  # Softmax activation for classification
)

# Print the architecture of the model with dropout layers.
print(model_chk)


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): Dropout(p=0.2, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): Dropout(p=0.2, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=64, bias=True)
  (7): Dropout(p=0.1, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=64, out_features=2, bias=True)
  (10): Softmax(dim=1)
)


In [58]:
# Define the loss function as Negative Log Likelihood Loss (NLLLoss).
criterion = nn.NLLLoss()

# Import the optimizer module and create an Adam optimizer for the 'model_chk' parameters.
# Set the learning rate to 1e-4 and add L2 regularization by specifying the 'weight_decay' parameter.
optimizer = optim.Adam(model_chk.parameters(), lr=1e-4, weight_decay=1e-5)


In [61]:
epochs = 3  # Number of training epochs
path = "../model"  # File path for saving or loading the model


In [62]:
import torch
from torch.autograd import Variable

for e in range(epochs):
    running_loss = 0
    for step, (batch_x, batch_y) in enumerate(loader):

        b_x = Variable(batch_x)
        b_y = Variable(batch_y.type(torch.LongTensor))
        
        # Training pass
        optimizer.zero_grad()
        
        output = model_chk(b_x)
        loss = criterion(output, b_y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Save a model checkpoint with current state
        torch.save({
            'epoch': e,
            'model_state_dict': model_chk.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': running_loss,
        }, path + "model_" + str(e) + ".pt")
    else:
        print(f"Training loss: {running_loss/len(X_train)}")  # Print the average loss for the current epoch


Training loss: -0.007902685004669625
Training loss: -0.007933632423122366
Training loss: -0.007970469637407555


In [63]:
model_load = model_chk  # Create a new instance of the model (assuming 'model_chk' is the same architecture as the pre-trained model)
optimizer = optim.Adam(model_reg.parameters(), lr=1e-4, weight_decay=1e-5)  # Create a new optimizer for the model

# Load a pre-trained model checkpoint (epoch 2 in this case)
checkpoint = torch.load(path + "model_2.pt")

# Load the state dictionary of the pre-trained model and optimizer
model_load.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

epoch = checkpoint['epoch']  # Retrieve the epoch number from the checkpoint
loss = checkpoint['loss']  # Retrieve the loss from the checkpoint

model_load.eval()  # Set the model to evaluation mode


Sequential(
  (0): Linear(in_features=12, out_features=128, bias=True)
  (1): Dropout(p=0.2, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): Dropout(p=0.2, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=64, bias=True)
  (7): Dropout(p=0.1, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=64, out_features=2, bias=True)
  (10): Softmax(dim=1)
)

In [64]:
from sklearn.metrics import accuracy_score

# Convert the test data 'X_test' to PyTorch Tensor
X_test_tensor = Tensor(X_test.values)

# Convert the test target variable 'y_test' to a PyTorch Tensor
y_test = Tensor(np.array(y_test))

# Perform inference (forward pass) on the test data using the pre-trained model ('model_load')
z = model_load(X_test_tensor)

# Convert the model's predictions 'z' to a list of predicted class labels
yhat = list(z.argmax(1))

# Convert the test target variable 'y_test' to a list
y_test = list(y_test)

# Calculate the accuracy score by comparing the predicted labels with the true labels and print the result
print("Accuracy Score of Test Data:", accuracy_score(y_test, yhat) * 100)


Accuracy Score of Test Data  65.16516516516516


---