In [23]:
import pandas as pd
import torch

train_data_path = '/kaggle/input/movie-ratings/movie-ratings/user_train.csv'
df_train = pd.read_csv(train_data_path)

df_train_features = df_train.drop(columns=['user_id', 'item_id'])

X_train = df_train_features.drop(columns=['interaction']).values
Y_train = df_train_features['interaction'].values


test_data_path = '/kaggle/input/movie-ratings/movie-ratings/user_test.csv'
df_test = pd.read_csv(test_data_path)

df_test_features = df_test.drop(columns=['user_id', 'item_id'])

X_test = df_test_features.drop(columns=['interaction']).values
Y_test = df_test_features['interaction'].values

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).view(-1, 1)

print(f"X_train tensor shape: {X_train_tensor.shape}")
print(f"Y_train tensor shape: {Y_train_tensor.shape}")
print(f"X_test tensor shape: {X_test_tensor.shape}")
print(f"Y_test tensor shape: {Y_test_tensor.shape}")


X_train tensor shape: torch.Size([90570, 40])
Y_train tensor shape: torch.Size([90570, 1])
X_test tensor shape: torch.Size([9430, 40])
Y_test tensor shape: torch.Size([9430, 1])


In [24]:
features = len(X_train[0])
genres = 19

print('Total Features are', features)
print('Movie/Item features are: ',genres)
print('User-specific features are: ',features - genres)

Total Features are 40
Movie/Item features are:  19
User-specific features are:  21


In [25]:
from torch.utils.data import Dataset, DataLoader



class MovieDataset(Dataset):
    def __init__(self, X, Y):
        self.x = X
        self.y = Y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [26]:
train_dataset = MovieDataset(X_train, Y_train)

test_dataset = MovieDataset(X_test, Y_test)

In [27]:
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [28]:
import torch

import torch.nn as nn



class DNNClassifier(nn.Module):

    def __init__(self, input_dim, dropout_prob=0.2, device = 'cpu'):

        super(DNNClassifier, self).__init__()

        self.input_dim = input_dim
        # Define the initial LayerNorm
        self.layer_norm_1 = nn.LayerNorm(input_dim, device=device)

        # Define layers in a single Sequential block inside ModuleList

        self.layers = nn.ModuleList([

            nn.Sequential(

                nn.Linear(input_dim, input_dim * 2),
                nn.ReLU(),
                nn.Linear(input_dim * 2, input_dim * 2),
                nn.ReLU(),
                nn.Dropout(p=dropout_prob)

            ),

            nn.Sequential(
                nn.Linear(input_dim * 2, input_dim * 2),
                nn.ReLU(),
                nn.Linear(input_dim * 2, input_dim),
                nn.ReLU()

            )

        ])
        # Define the final classifier layer
        self.classifier = nn.Linear(input_dim, 1)
        
    def forward(self, x):
        # Apply LayerNorm first
        x = self.layer_norm_1(x)
        for layer in self.layers:
            x = layer(x)

        x = self.classifier(x)

        return x



# Example usage

model = DNNClassifier(input_dim=16, dropout_prob=0.2)

x = torch.randn(8, 16)  # Example input: (batch_size, input_dim)

output = model(x)

print("Output shape:", output.shape)


Output shape: torch.Size([8, 1])


In [29]:
params = 0

for parameters in model.parameters():
    params += sum([p.numel() for p in parameters])

print('Total parameters are: ', params)


Total parameters are:  3233


In [30]:
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm  # Importing tqdm

# Hyperparameters
input_dim = features  # Define 'features' as the number of input features (e.g., number of columns in your data)
learning_rate = 0.001
num_epochs = 20
dropout_prob = 0.2

# Device configuration (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, loss function, and optimizer
model = DNNClassifier(input_dim=input_dim, dropout_prob=dropout_prob).to(device)
criterion = nn.BCEWithLogitsLoss()  # Combines sigmoid with binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_list = []

# Training loop with tqdm progress bar
for epoch in range(num_epochs):
    # Wrapping the training loop with tqdm to display a progress bar
    with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit='batch') as pbar:
        train_loss = 0.0
        model.train()
        
        # Iterate over the batches in the train_loader
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.float32)
            
            # Forward pass
            outputs = model(inputs).to(torch.float32)
            loss = criterion(outputs, labels.view(-1, 1))
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Track loss
            train_loss += loss.item() * inputs.size(0)
            
            # Update the progress bar
            pbar.set_postfix(loss=train_loss / (pbar.n + 1))
            pbar.update(1)  # Increment the progress bar by one batch
    
    avg_train_loss = train_loss / len(train_loader.dataset)
    print(f"\nEpoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}")
    print('-' * 50)
    loss_list.append(avg_train_loss)

# Saving the trained model
torch.save(model.state_dict(), 'dnn_classifier.pth')
print("Model saved successfully.")


Epoch 1/20: 100%|██████████| 2831/2831 [00:11<00:00, 250.55batch/s, loss=15]  



Epoch [1/20] - Train Loss: 0.4674
--------------------------------------------------


Epoch 2/20: 100%|██████████| 2831/2831 [00:11<00:00, 250.75batch/s, loss=14.8]



Epoch [2/20] - Train Loss: 0.4616
--------------------------------------------------


Epoch 3/20: 100%|██████████| 2831/2831 [00:11<00:00, 244.65batch/s, loss=14.7]



Epoch [3/20] - Train Loss: 0.4597
--------------------------------------------------


Epoch 4/20: 100%|██████████| 2831/2831 [00:11<00:00, 252.60batch/s, loss=14.6]



Epoch [4/20] - Train Loss: 0.4577
--------------------------------------------------


Epoch 5/20: 100%|██████████| 2831/2831 [00:11<00:00, 249.42batch/s, loss=14.6]



Epoch [5/20] - Train Loss: 0.4559
--------------------------------------------------


Epoch 6/20: 100%|██████████| 2831/2831 [00:11<00:00, 248.34batch/s, loss=14.6]



Epoch [6/20] - Train Loss: 0.4548
--------------------------------------------------


Epoch 7/20: 100%|██████████| 2831/2831 [00:11<00:00, 249.66batch/s, loss=14.5]



Epoch [7/20] - Train Loss: 0.4534
--------------------------------------------------


Epoch 8/20: 100%|██████████| 2831/2831 [00:11<00:00, 250.69batch/s, loss=14.5]



Epoch [8/20] - Train Loss: 0.4529
--------------------------------------------------


Epoch 9/20: 100%|██████████| 2831/2831 [00:11<00:00, 247.11batch/s, loss=14.5]



Epoch [9/20] - Train Loss: 0.4521
--------------------------------------------------


Epoch 10/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.92batch/s, loss=14.4]



Epoch [10/20] - Train Loss: 0.4509
--------------------------------------------------


Epoch 11/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.92batch/s, loss=14.4]



Epoch [11/20] - Train Loss: 0.4495
--------------------------------------------------


Epoch 12/20: 100%|██████████| 2831/2831 [00:11<00:00, 248.50batch/s, loss=14.4]



Epoch [12/20] - Train Loss: 0.4490
--------------------------------------------------


Epoch 13/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.57batch/s, loss=14.3]



Epoch [13/20] - Train Loss: 0.4478
--------------------------------------------------


Epoch 14/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.01batch/s, loss=14.3]



Epoch [14/20] - Train Loss: 0.4472
--------------------------------------------------


Epoch 15/20: 100%|██████████| 2831/2831 [00:11<00:00, 247.02batch/s, loss=14.2]



Epoch [15/20] - Train Loss: 0.4454
--------------------------------------------------


Epoch 16/20: 100%|██████████| 2831/2831 [00:11<00:00, 252.50batch/s, loss=14.2]



Epoch [16/20] - Train Loss: 0.4446
--------------------------------------------------


Epoch 17/20: 100%|██████████| 2831/2831 [00:11<00:00, 247.33batch/s, loss=14.2]



Epoch [17/20] - Train Loss: 0.4433
--------------------------------------------------


Epoch 18/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.29batch/s, loss=14.2]



Epoch [18/20] - Train Loss: 0.4423
--------------------------------------------------


Epoch 19/20: 100%|██████████| 2831/2831 [00:11<00:00, 251.62batch/s, loss=14.1]



Epoch [19/20] - Train Loss: 0.4410
--------------------------------------------------


Epoch 20/20: 100%|██████████| 2831/2831 [00:11<00:00, 247.53batch/s, loss=14.1]



Epoch [20/20] - Train Loss: 0.4396
--------------------------------------------------
Model saved successfully.


In [31]:
def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():

        for inputs, labels in test_loader:

            inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.float32)            
            # Forward pass

            outputs = model(inputs).to(torch.float32)
            loss = criterion(outputs, labels.view(-1, 1))

            # Track loss and accuracy
            running_loss += loss.item() * inputs.size(0)
            outputs = torch.sigmoid(outputs)
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels.view(-1, 1)).sum().item()
            total += labels.size(0)

    

    avg_loss = running_loss / total
    accuracy = correct / total
    return avg_loss, accuracy


In [32]:
test_loss, test_acc = evaluate(model, test_loader, criterion, device)

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")




Test Loss: 0.4286, Test Accuracy: 0.8370
