## Neural Network

### Load the Processed Training Data

In [1]:
import numpy as np
import pandas as pd
# torch is a powerful deep learning framework for building and training neural networks.
import torch
# torch.nn provides modules and classes to help create neural network layers.
import torch.nn as nn
# contains optimization algorithms like SGD, Adam, etc., for training models.
import torch.optim as optim
import random
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# CUDA (Compute Unified Device Architecture) is a parallel computing platform and application programming interface
# (API) created by NVIDIA. It allows developers to use the GPU (Graphics Processing Unit) for general-purpose computing tasks,
# accelerating computations significantly compared to a CPU.

# A tensor is a multi-dimensional array, similar to numpy arrays, but with additional capabilities designed for deep learning and GPU acceleration.

In [3]:
# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# If available, get the GPU name
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

CUDA available: True
GPU Name: Tesla T4


In [4]:
# Load the processed data for training

In [5]:
df=pd.read_csv('/kaggle/input/ml-assignment/Processed_train.csv')

In [6]:
df.head(5)

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
0,18,137576,209136,846,26,2,10.47,60,0.81,1,3,2,1,0,0,0,0
1,47,57194,5970,748,30,2,19.72,36,0.73,1,1,1,0,1,1,0,0
2,26,84328,95065,453,7,2,24.25,12,0.45,3,3,3,0,0,3,1,0
3,53,49795,229582,533,107,3,14.44,60,0.17,2,3,2,1,0,2,1,1
4,49,115450,22072,840,0,4,24.48,12,0.11,2,2,2,0,1,1,1,0


In [7]:
print(df.shape)

(204277, 17)


### Need to train test split

In [8]:
# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)  # For CPU
torch.cuda.manual_seed_all(42)  # For GPU
random.seed(42)

# Split features (X) and target (Y)
X = df.drop(columns=['Default'])  # Drop the target column from features
Y = df['Default']                 # Target column

# Split the dataset into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

### Scale the features

In [9]:
# Scaling the input features
scaler= StandardScaler()
scaled_X_train=scaler.fit_transform(X_train)
scaled_X_test= scaler.transform(X_test)

In [10]:
print(type(Y_train))

<class 'pandas.core.series.Series'>


In [11]:
print(type(scaled_X_train))

<class 'numpy.ndarray'>


### Convert to tensors

In [12]:
# Convert numpy arrays to pytorch tensors
X_train_tensor=torch.tensor(scaled_X_train,dtype=torch.float32)
X_test_tensor=torch.tensor(scaled_X_test,dtype=torch.float32)
Y_train_tensor=torch.tensor(Y_train.values,dtype=torch.float32)
Y_test_tensor=torch.tensor(Y_test.values,dtype=torch.float32)

In [13]:
print(X_train_tensor.shape)

torch.Size([163421, 16])


In [14]:
Y_train_tensor=Y_train_tensor.unsqueeze(1)
Y_test_tensor=Y_test_tensor.unsqueeze(1)

In [15]:
print(Y_train_tensor.shape)

torch.Size([163421, 1])


In [16]:
# define the model
# Note, Dropout is a regularization technique that is used to prevent overfitting. During training, Dropout randomly sets a fraction of input units to 0.

### Defining the first model

In [45]:
model=nn.Sequential(
    # Defining a 3 layer neural network - 2 hidden + 1 output layer. We use ReLU activation function for the hidden layers and Sigmoid activation function for the
    # output layer since it is a binary classification problem
    nn.Linear(16,128), # input layer to first hidden layer (16 -> 128 neurons)
    nn.ReLU(), # Rectified Linear Unit Activation Function
    nn.Linear(128,64), # first hidden layer to second hidden layer (128 -> 64 neurons)
    nn.ReLU(), # Rectified Linear Unit Activation Function again
    nn.Linear(64,1), # second hidden layer to output layer (64 -> 1 neuron)
    nn.Sigmoid() # Sigmoid activation function since it is binary classification
)

In [46]:
print('Model:',model)

Model: Sequential(
  (0): Linear(in_features=16, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=1, bias=True)
  (5): Sigmoid()
)


### Define the loss function and optimizer

In [23]:
# Since it is a binary classification problem, we will use the binary cross entropy loss function

In [47]:
loss_function = nn.BCELoss()

In [48]:
# Need to define optimizer as well. An optimizer is an algorithm you use to adjust model weights progressively.
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to GPU
model = model.to(device)

# Move the data to GPU
X_train_tensor = X_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
Y_train_tensor = Y_train_tensor.to(device)
Y_test_tensor = Y_test_tensor.to(device)


### Training the Model

In [42]:
# training the model
num_epochs = 5000  # Number of epochs for training

for epoch in range(num_epochs):
    model.train()  # Set model to training mode

    # Forward pass
    outputs = model(X_train_tensor)  # Get predictions from model
    loss = loss_function(outputs, Y_train_tensor)  # Calculate loss

    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()  # Backpropagate gradients
    optimizer.step()  # Update weights

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/5000], Loss: 0.3305
Epoch [20/5000], Loss: 0.3289
Epoch [30/5000], Loss: 0.3232
Epoch [40/5000], Loss: 0.3195
Epoch [50/5000], Loss: 0.3181
Epoch [60/5000], Loss: 0.3174
Epoch [70/5000], Loss: 0.3167
Epoch [80/5000], Loss: 0.3159
Epoch [90/5000], Loss: 0.3152
Epoch [100/5000], Loss: 0.3145
Epoch [110/5000], Loss: 0.3139
Epoch [120/5000], Loss: 0.3133
Epoch [130/5000], Loss: 0.3128
Epoch [140/5000], Loss: 0.3124
Epoch [150/5000], Loss: 0.3121
Epoch [160/5000], Loss: 0.3118
Epoch [170/5000], Loss: 0.3115
Epoch [180/5000], Loss: 0.3112
Epoch [190/5000], Loss: 0.3110
Epoch [200/5000], Loss: 0.3107
Epoch [210/5000], Loss: 0.3104
Epoch [220/5000], Loss: 0.3101
Epoch [230/5000], Loss: 0.3098
Epoch [240/5000], Loss: 0.3095
Epoch [250/5000], Loss: 0.3092
Epoch [260/5000], Loss: 0.3089
Epoch [270/5000], Loss: 0.3086
Epoch [280/5000], Loss: 0.3083
Epoch [290/5000], Loss: 0.3080
Epoch [300/5000], Loss: 0.3077
Epoch [310/5000], Loss: 0.3073
Epoch [320/5000], Loss: 0.3070
Epoch [330/5000],

### Evaluating the Model

In [43]:
# Set the model to evaluation mode (no gradient updates)
model.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = model(X_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array
y_test_cpu = Y_test_tensor.cpu().numpy()  # Convert to NumPy array

# Calculate accuracy using scikit-learn's accuracy_score
accuracy = accuracy_score(y_pred, y_test_cpu)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 88.11%


### Trying Model 2

In [136]:
# let us try another model and see if accuracy improves:
model1=nn.Sequential(
    # Defining a 4 layer neural network - 3 hidden + 1 output layer. We use ReLU activation function for the hidden layers and Sigmoid activation function for the
    # output layer since it is a binary classification problem
    nn.Linear(16,128), # input layer to first hidden layer (16 -> 128 neurons)
    nn.ReLU(), # Rectified Linear Unit Activation Function
    nn.Dropout(0.5),
    nn.Linear(128,64), # first hidden layer to second hidden layer (128 -> 64 neurons)
    nn.ReLU(), # Rectified Linear Unit Activation Function again
    nn.Dropout(0.4),
    nn.Linear(64,32), # second hidden layer to third hidden layer (64 -> 32 neurons)
    nn.ReLU(), # Rectified Linear Unit Activation Function
    nn.Dropout(0.2),
    nn.Linear(32,1), # third hidden layer to output layer (32 -> 1 neuron)
    nn.Sigmoid() # Sigmoid activation function since it is binary classification
)

In [122]:
print(model1)

Sequential(
  (0): Linear(in_features=16, out_features=128, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.4, inplace=False)
  (6): Linear(in_features=64, out_features=32, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.2, inplace=False)
  (9): Linear(in_features=32, out_features=1, bias=True)
  (10): Sigmoid()
)


In [123]:
model1=model1.to(device)
optimizer=optim.Adam(model1.parameters(),lr=0.015)

In [124]:
# train the new model
num_epochs = 3000  # Number of epochs for training

for epoch in range(num_epochs):
    model1.train()  # Set model to training mode

    # Forward pass
    outputs = model1(X_train_tensor)  # Get predictions from model
    loss = loss_function(outputs, Y_train_tensor)  # Calculate loss

    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()  # Backpropagate gradients
    optimizer.step()  # Update weights

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/3000], Loss: 0.3420
Epoch [20/3000], Loss: 0.3376
Epoch [30/3000], Loss: 0.3297
Epoch [40/3000], Loss: 0.3287
Epoch [50/3000], Loss: 0.3275
Epoch [60/3000], Loss: 0.3264
Epoch [70/3000], Loss: 0.3261
Epoch [80/3000], Loss: 0.3253
Epoch [90/3000], Loss: 0.3244
Epoch [100/3000], Loss: 0.3247
Epoch [110/3000], Loss: 0.3237
Epoch [120/3000], Loss: 0.3234
Epoch [130/3000], Loss: 0.3225
Epoch [140/3000], Loss: 0.3224
Epoch [150/3000], Loss: 0.3208
Epoch [160/3000], Loss: 0.3205
Epoch [170/3000], Loss: 0.3205
Epoch [180/3000], Loss: 0.3205
Epoch [190/3000], Loss: 0.3198
Epoch [200/3000], Loss: 0.3190
Epoch [210/3000], Loss: 0.3189
Epoch [220/3000], Loss: 0.3188
Epoch [230/3000], Loss: 0.3187
Epoch [240/3000], Loss: 0.3191
Epoch [250/3000], Loss: 0.3184
Epoch [260/3000], Loss: 0.3180
Epoch [270/3000], Loss: 0.3188
Epoch [280/3000], Loss: 0.3184
Epoch [290/3000], Loss: 0.3176
Epoch [300/3000], Loss: 0.3178
Epoch [310/3000], Loss: 0.3177
Epoch [320/3000], Loss: 0.3180
Epoch [330/3000],

In [125]:
# Set the model to evaluation mode (no gradient updates)
model1.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = model1(X_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array
y_test_cpu = Y_test_tensor.cpu().numpy()  # Convert to NumPy array

# Calculate accuracy using scikit-learn's accuracy_score
accuracy = accuracy_score(y_pred, y_test_cpu)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 88.46%


### Making predictions with the second model

In [25]:
new_X_test=pd.read_csv('/kaggle/input/ml-assignment/Processed_test.csv')

In [26]:
print(new_X_test.shape)

(51070, 17)


In [36]:
new_X_test.head()

Unnamed: 0,LoanID,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner
0,CKV34LU7V7,55,112656,92393,581,113,2,23.54,36,0.15,4,3,2,1,1,4,0
1,62KTYNH93J,56,91569,131575,641,54,1,15.19,12,0.43,1,2,1,1,1,1,1
2,JGFUSOIUH7,26,78169,75417,569,105,3,18.02,12,0.29,3,2,3,1,1,1,1
3,4538THBHOX,26,63033,10804,326,118,1,14.71,24,0.41,1,2,2,0,0,0,1
4,DXLNA06JHR,24,29665,21182,662,102,3,15.02,60,0.69,4,1,2,0,1,0,1


In [37]:
# need to scale it first

In [27]:
ids=new_X_test['LoanID']
new_X_test=new_X_test.drop(columns=['LoanID'])

In [8]:
new_X_test.head()

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner
0,55,112656,92393,581,113,2,23.54,36,0.15,4,3,2,1,1,4,0
1,56,91569,131575,641,54,1,15.19,12,0.43,1,2,1,1,1,1,1
2,26,78169,75417,569,105,3,18.02,12,0.29,3,2,3,1,1,1,1
3,26,63033,10804,326,118,1,14.71,24,0.41,1,2,2,0,0,0,1
4,24,29665,21182,662,102,3,15.02,60,0.69,4,1,2,0,1,0,1


In [28]:
scaled_new_X_test=scaler.transform(new_X_test)

In [33]:
# Convert it to tensor
new_x_test_tensor=torch.tensor(scaled_new_X_test,dtype=torch.float32)
# move it to gpu
new_x_test_tensor=new_x_test_tensor.to(device)

In [126]:
# Set the model to evaluation mode (no gradient updates)
model1.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = model1(new_x_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array

In [127]:
y_pred=pd.DataFrame(y_pred)

In [128]:
y_pred.value_counts()

0  
0.0    50352
1.0      718
Name: count, dtype: int64

In [71]:
print(type(ids))

<class 'pandas.core.series.Series'>


In [72]:
print(type(y_pred))

<class 'pandas.core.frame.DataFrame'>


In [129]:
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)

In [130]:
merged_df.head()

Unnamed: 0,LoanID,0
0,CKV34LU7V7,0.0
1,62KTYNH93J,0.0
2,JGFUSOIUH7,0.0
3,4538THBHOX,0.0
4,DXLNA06JHR,0.0


In [131]:
merged_df = merged_df.rename(columns={0: 'Default'})

In [132]:
merged_df.head()

Unnamed: 0,LoanID,Default
0,CKV34LU7V7,0.0
1,62KTYNH93J,0.0
2,JGFUSOIUH7,0.0
3,4538THBHOX,0.0
4,DXLNA06JHR,0.0


In [133]:
merged_df['Default']=merged_df['Default'].astype(int)

In [134]:
merged_df.to_csv('predictions.csv',index=False)

In [135]:
# this on submission gave 88.576 % accuracy

### Trying a third model to improve accuracy

In [88]:
# let us try a third model and see if the accuracy improves
model2=nn.Sequential(
    # Defining a 4 layer neural network - 3 hidden + 1 output layer. We use ReLU activation function for the hidden layers and Sigmoid activation function for the
    # output layer since it is a binary classification problem
    nn.Linear(16,128), # input layer to first hidden layer (16 -> 128 neurons)
    nn.BatchNorm1d(128),  # Batch Normalization for 128 neurons
    nn.ReLU(), # Rectified Linear Unit Activation Function
    nn.Dropout(0.5),
    nn.Linear(128,64), # first hidden layer to second hidden layer (128 -> 64 neurons)
    nn.BatchNorm1d(64),  # Batch Normalization for 64 neurons
    nn.ReLU(), # Rectified Linear Unit Activation Function again
    nn.Dropout(0.2),
    nn.Linear(64,32), # second hidden layer to third hidden layer (64 -> 32 neurons)
    nn.BatchNorm1d(32),  # Batch Normalization for 32 neurons
    nn.ReLU(), # Rectified Linear Unit Activation Function
    nn.Dropout(0.1),
    nn.Linear(32,1), # third hidden layer to output layer (32 -> 1 neuron)
    nn.Sigmoid() # Sigmoid activation function since it is binary classification
)

In [89]:
model2=model2.to(device)
optimizer=optim.Adam(model2.parameters(),lr=0.01)

In [90]:
# train the new model
num_epochs = 3000  # Number of epochs for training

for epoch in range(num_epochs):
    model2.train()  # Set model to training mode

    # Forward pass
    outputs = model2(X_train_tensor)  # Get predictions from model
    loss = loss_function(outputs, Y_train_tensor)  # Calculate loss

    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()  # Backpropagate gradients
    optimizer.step()  # Update weights

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/3000], Loss: 0.3334
Epoch [20/3000], Loss: 0.3264
Epoch [30/3000], Loss: 0.3232
Epoch [40/3000], Loss: 0.3186
Epoch [50/3000], Loss: 0.3179
Epoch [60/3000], Loss: 0.3167
Epoch [70/3000], Loss: 0.3162
Epoch [80/3000], Loss: 0.3159
Epoch [90/3000], Loss: 0.3150
Epoch [100/3000], Loss: 0.3143
Epoch [110/3000], Loss: 0.3140
Epoch [120/3000], Loss: 0.3136
Epoch [130/3000], Loss: 0.3134
Epoch [140/3000], Loss: 0.3132
Epoch [150/3000], Loss: 0.3130
Epoch [160/3000], Loss: 0.3125
Epoch [170/3000], Loss: 0.3123
Epoch [180/3000], Loss: 0.3126
Epoch [190/3000], Loss: 0.3121
Epoch [200/3000], Loss: 0.3119
Epoch [210/3000], Loss: 0.3118
Epoch [220/3000], Loss: 0.3116
Epoch [230/3000], Loss: 0.3116
Epoch [240/3000], Loss: 0.3113
Epoch [250/3000], Loss: 0.3109
Epoch [260/3000], Loss: 0.3111
Epoch [270/3000], Loss: 0.3111
Epoch [280/3000], Loss: 0.3109
Epoch [290/3000], Loss: 0.3104
Epoch [300/3000], Loss: 0.3105
Epoch [310/3000], Loss: 0.3106
Epoch [320/3000], Loss: 0.3106
Epoch [330/3000],

In [91]:
# Set the model to evaluation mode (no gradient updates)
model2.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = model2(X_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array
y_test_cpu = Y_test_tensor.cpu().numpy()  # Convert to NumPy array

# Calculate accuracy using scikit-learn's accuracy_score
accuracy = accuracy_score(y_pred, y_test_cpu)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 88.54%


In [92]:
# Set the model to evaluation mode (no gradient updates)
model2.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = model2(new_x_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array

In [93]:
y_pred=pd.DataFrame(y_pred)
print(y_pred.value_counts())

0  
0.0    50589
1.0      481
Name: count, dtype: int64


In [94]:
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions1.csv',index=False)

In [95]:
heyo=pd.read_csv('predictions1.csv')
heyo['Default'].value_counts()

Default
0    50589
1      481
Name: count, dtype: int64

In [96]:
# this gave an accuracy of 88.746 % on kaggle submission which is a significant improvement

## Logistic Regression

In [97]:
# Logistic regression is essentially a single layer single neuron neural network that makes use of a sigmoid activation function
logistic_regression=nn.Sequential(
    nn.Linear(16,1), # 16 input features -> 1 output neuron
    nn.Sigmoid() # sigmoid activation function
)

In [98]:
logistic_regression=logistic_regression.to(device)
optimizer=optim.Adam(logistic_regression.parameters(),lr=0.01)

In [100]:
# train the new model
num_epochs = 1000  # Number of epochs for training

for epoch in range(num_epochs):
    logistic_regression.train()  # Set model to training mode

    # Forward pass
    outputs = logistic_regression(X_train_tensor)  # Get predictions from model
    loss = loss_function(outputs, Y_train_tensor)  # Calculate loss

    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()  # Backpropagate gradients
    optimizer.step()  # Update weights

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/1000], Loss: 0.3163
Epoch [20/1000], Loss: 0.3163
Epoch [30/1000], Loss: 0.3163
Epoch [40/1000], Loss: 0.3163
Epoch [50/1000], Loss: 0.3163
Epoch [60/1000], Loss: 0.3163
Epoch [70/1000], Loss: 0.3163
Epoch [80/1000], Loss: 0.3163
Epoch [90/1000], Loss: 0.3163
Epoch [100/1000], Loss: 0.3163
Epoch [110/1000], Loss: 0.3163
Epoch [120/1000], Loss: 0.3163
Epoch [130/1000], Loss: 0.3163
Epoch [140/1000], Loss: 0.3163
Epoch [150/1000], Loss: 0.3163
Epoch [160/1000], Loss: 0.3163
Epoch [170/1000], Loss: 0.3163
Epoch [180/1000], Loss: 0.3163
Epoch [190/1000], Loss: 0.3163
Epoch [200/1000], Loss: 0.3163
Epoch [210/1000], Loss: 0.3163
Epoch [220/1000], Loss: 0.3163
Epoch [230/1000], Loss: 0.3163
Epoch [240/1000], Loss: 0.3163
Epoch [250/1000], Loss: 0.3163
Epoch [260/1000], Loss: 0.3163
Epoch [270/1000], Loss: 0.3163
Epoch [280/1000], Loss: 0.3163
Epoch [290/1000], Loss: 0.3163
Epoch [300/1000], Loss: 0.3163
Epoch [310/1000], Loss: 0.3163
Epoch [320/1000], Loss: 0.3163
Epoch [330/1000],

In [101]:
# Set the model to evaluation mode (no gradient updates)
logistic_regression.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = logistic_regression(X_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array
y_test_cpu = Y_test_tensor.cpu().numpy()  # Convert to NumPy array

# Calculate accuracy using scikit-learn's accuracy_score
accuracy = accuracy_score(y_pred, y_test_cpu)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 88.43%


In [102]:
# Set the model to evaluation mode (no gradient updates)
logistic_regression.eval()

with torch.no_grad():  # No gradient calculation during inference
    # Forward pass through the model
    outputs = logistic_regression(new_x_test_tensor)

    # Convert probabilities to binary labels (0 or 1)
    predicted = (outputs >= 0.5).float()

# Calculate accuracy
# Move tensors to CPU for use with scikit-learn
y_pred = predicted.cpu().numpy()  # Convert to NumPy array

In [103]:
y_pred=pd.DataFrame(y_pred)
print(y_pred.value_counts())

0  
0.0    50749
1.0      321
Name: count, dtype: int64


In [105]:
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions_lr.csv',index=False)
heyo=pd.read_csv('predictions_lr.csv')
heyo['Default'].value_counts()

Default
0    50749
1      321
Name: count, dtype: int64

In [None]:
# gave 88.586% accuracy on kaggle submission .

### Scikit Learn's Logistic Regression

In [2]:
from sklearn.linear_model import LogisticRegression

In [3]:
lr=LogisticRegression()

In [12]:
lr.fit(scaled_X_train,Y_train)

In [13]:
y_pred=lr.predict(scaled_X_test)

In [14]:
print(accuracy_score(y_pred,Y_test))

0.8843254356765224


In [15]:
y_pred_sub=lr.predict(scaled_new_X_test)
y_pred_sub=pd.DataFrame(y_pred_sub)

In [16]:
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred_sub], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions_lr_sklearn.csv',index=False)

In [17]:
merged_df['Default'].value_counts()

Default
0    50749
1      321
Name: count, dtype: int64

In [19]:
# You can see that sklearn's logistic regression and our logistic regression implemented as a single neuron neural network worked exactly the same

## Support Vector Machines

In [17]:
from sklearn.svm import SVC

In [18]:
classifier=SVC()
# Sklearn's SVM was not able to run due to the large input size. We ha

In [19]:
import cuml
print("cuML is available!")


cuML is available!


#### RBF kernel

In [35]:
from cuml.svm import SVC
import cudf
# Replace X_data and y_data with your dataset
X_gpu = cudf.DataFrame(scaled_X_train)
y_gpu = cudf.Series(Y_train)

# Train and predict using cuML's SVM
model = SVC(kernel='rbf', C=1.4, gamma='scale')
model.fit(X_gpu, y_gpu)

In [36]:
X_gpu_test=cudf.DataFrame(scaled_X_test)
preds=model.predict(X_gpu_test)

In [37]:
preds=preds.to_numpy()

In [38]:
print(accuracy_score(preds,Y_test))

0.8829792441746622


In [39]:
y_pred=model.predict(scaled_new_X_test)
y_pred=pd.DataFrame(y_pred)

In [40]:
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions_svm.csv',index=False)
heyo=pd.read_csv('predictions_svm.csv')
heyo['Default'].value_counts()

Default
0    51067
1        3
Name: count, dtype: int64

In [19]:
# This model gave a relatively low accuaracy of 88.453% on submission

In [18]:
from cuml.svm import SVC
import cudf
# Replace X_data and y_data with your dataset
X_gpu = cudf.DataFrame(scaled_X_train)
y_gpu = cudf.Series(Y_train)

# Train and predict using cuML's SVM
model = SVC(kernel='rbf', C=0.2, gamma='scale')
model.fit(X_gpu, y_gpu)

In [20]:
preds=model.predict(X_gpu_test)
preds=preds.to_numpy()
print(accuracy_score(preds,Y_test))

0.8829302917564128


In [21]:
y_pred=model.predict(scaled_new_X_test)
y_pred=pd.DataFrame(y_pred)
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions_svm1.csv',index=False)
heyo=pd.read_csv('predictions_svm1.csv')
heyo['Default'].value_counts()

Default
0    51070
Name: count, dtype: int64

#### Polynomial Kernel

In [41]:
# Train and predict using cuML's SVM
model = SVC(kernel='poly', C=1.0, gamma='scale',coef0=1,degree=3)
model.fit(X_gpu, y_gpu)

In [42]:
preds=model.predict(X_gpu_test)
preds=preds.to_numpy()
print(accuracy_score(preds,Y_test))

0.8829302917564128


In [43]:
y_pred=model.predict(scaled_new_X_test)
y_pred=pd.DataFrame(y_pred)
# Combine them into a single DataFrame
merged_df = pd.concat([ids, y_pred], axis=1)
merged_df = merged_df.rename(columns={0: 'Default'})
merged_df['Default']=merged_df['Default'].astype(int)
merged_df.to_csv('predictions_svm2.csv',index=False)
heyo=pd.read_csv('predictions_svm2.csv')
heyo['Default'].value_counts()

Default
0    51070
Name: count, dtype: int64

In [44]:
# Even polynomial Kernel didn't give the best results. Gave 88.447% accuracy on Kaggle submission

In [26]:
# Train and predict using cuML's SVM
model = SVC(kernel='poly', C=1.8, gamma='scale',coef0=1,degree=4)
model.fit(X_gpu, y_gpu)

In [27]:
preds=model.predict(X_gpu_test)
preds=preds.to_numpy()
print(accuracy_score(preds,Y_test))

0.8829302917564128
