<a href="https://colab.research.google.com/github/Ganesh-Kdt/Assignment-2-NN/blob/main/Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/dataset.csv')
df.head()
print(df.shape)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(766, 8)


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch

# Step 1: Analyze the dataset, returning main statistics
dataset_statistics = df.describe()

# Step 1: Check for invalid entries in each column
for column in df.columns:
    unique_values = df[column].unique()
    invalid_entries = [value for value in unique_values if not isinstance(value, (int, float))]
    print(f"Invalid entries in column '{column}': {invalid_entries}")

# Replacing invalid characters with NaN
df_cleaned = df.replace({'f1': {'c': np.nan}, 'f2': {'f': np.nan}, 'f4': {'a': np.nan}, 'f6': {'e': np.nan, 'd': np.nan}})

# Converting columns to numeric, any remaining invalid values will become NaN
df_cleaned = df_cleaned.apply(pd.to_numeric, errors='coerce')

# Replacing NaN values with the median of their respective columns
for column in df_cleaned.columns:
    if df_cleaned[column].isna().any():
        df_cleaned[column] = df_cleaned[column].fillna(df_cleaned[column].median())

# Display the cleaned dataset to verify that invalid values are replaced
print(df_cleaned.head())

# Step 2: Preprocessing (Scaling and Splitting)

# Separating features and target
X = df_cleaned.drop(columns=['target'])  # Drop the target column
y = df_cleaned['target']  # Store the target column

# Scaling the features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the dataset into 70% training, 15% validation, 15% testing
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

# Step 3: Converting the datasets into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Display the shape of the tensors to verify the splits
print("Training data shape:", X_train_tensor.shape, y_train_tensor.shape)
print("Validation data shape:", X_val_tensor.shape, y_val_tensor.shape)
print("Test data shape:", X_test_tensor.shape, y_test_tensor.shape)


Invalid entries in column 'f1': ['6', '1', '8', '0', '5', '3', '10', '2', '4', '7', '9', '11', '13', '15', '17', '12', '14', 'c']
Invalid entries in column 'f2': ['148', '85', '183', '89', '137', '116', '78', '115', '197', '125', '110', '168', '139', '189', '166', '100', '118', '107', '103', '126', '99', '196', '119', '143', '147', '97', '145', '117', '109', '158', '88', '92', '122', '138', '102', '90', '111', '180', '133', '106', '171', '159', '146', '71', '105', '101', '176', '150', '73', '187', '84', '44', '141', '114', '95', '129', '79', '0', '62', '131', '112', '113', '74', '83', '136', '80', '123', '81', '134', '142', '144', '93', '163', '151', '96', '155', '76', '160', '124', '162', '132', '120', '173', '170', '128', '108', '154', '57', '156', '153', '188', '152', '104', '87', '75', '179', '130', '194', '181', '135', '184', '140', '177', '164', '91', '165', '86', '193', '191', '161', '167', '77', '182', '157', '178', '61', '98', '127', '82', '72', '172', '94', '175', '195', '68'

In [3]:
import torch
import time
import torch.nn as nn
import torch.optim as optim

# Step 1: Define the Neural Network architecture
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        # Input layer
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        # Hidden layer
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        # Output layer
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()  # Binary classification output

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.sigmoid(out)
        return out

# Step 2: Initialize the model, loss function, and optimizer
input_size = 7  # Number of input features (based on your dataset)
hidden_size = 64  # Hidden layer size
output_size = 1  # Binary classification

model = SimpleNN(input_size, hidden_size, output_size)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 3: Train the model
epochs = 50  # Number of epochs
batch_size = 100  # Batch size

# DataLoader for batching
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

start_time = time.time()

for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for inputs, labels in train_loader:
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs.squeeze(), labels)  # Compute loss

        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        running_loss += loss.item()

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():
        for val_inputs, val_labels in val_loader:
            val_outputs = model(val_inputs)
            val_loss += criterion(val_outputs.squeeze(), val_labels).item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

end_time = time.time()  # Record the end time
training_time = end_time - start_time  # Calculate elapsed time
print(f"Total Training Time: {training_time:.2f} seconds")

# Step 4: Evaluate the model on the test set
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for test_inputs, test_labels in test_loader:
        test_outputs = model(test_inputs)
        predicted = (test_outputs.squeeze() >= 0.5).float()  # Binarize predictions
        total += test_labels.size(0)
        correct += (predicted == test_labels).sum().item()

accuracy = correct / total * 100
print(f"Test Accuracy: {accuracy:.2f}%")

Epoch [1/50], Loss: 0.6661, Val Loss: 0.6562
Epoch [2/50], Loss: 0.6410, Val Loss: 0.6307
Epoch [3/50], Loss: 0.6109, Val Loss: 0.6083
Epoch [4/50], Loss: 0.5884, Val Loss: 0.5862
Epoch [5/50], Loss: 0.5658, Val Loss: 0.5636
Epoch [6/50], Loss: 0.5513, Val Loss: 0.5413
Epoch [7/50], Loss: 0.5286, Val Loss: 0.5243
Epoch [8/50], Loss: 0.5154, Val Loss: 0.5145
Epoch [9/50], Loss: 0.4719, Val Loss: 0.5125
Epoch [10/50], Loss: 0.4831, Val Loss: 0.5176
Epoch [11/50], Loss: 0.4807, Val Loss: 0.5212
Epoch [12/50], Loss: 0.4763, Val Loss: 0.5228
Epoch [13/50], Loss: 0.4709, Val Loss: 0.5251
Epoch [14/50], Loss: 0.4590, Val Loss: 0.5254
Epoch [15/50], Loss: 0.4668, Val Loss: 0.5261
Epoch [16/50], Loss: 0.4660, Val Loss: 0.5248
Epoch [17/50], Loss: 0.4630, Val Loss: 0.5270
Epoch [18/50], Loss: 0.4549, Val Loss: 0.5281
Epoch [19/50], Loss: 0.4655, Val Loss: 0.5320
Epoch [20/50], Loss: 0.4514, Val Loss: 0.5265
Epoch [21/50], Loss: 0.4547, Val Loss: 0.5245
Epoch [22/50], Loss: 0.4688, Val Loss: 0.52