In [1]:
# import libraray
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import pandas as pd
import numpy as np

In [2]:
# Define custom Dataset
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        features = self.X[index]
        target = self.y[index]
        return features, target

    def __len__(self):
        return len(self.X)

In [3]:
# Read Dataset
df = pd.read_csv("./movie-rating-additional.csv")
df['Year'] = df['Year'].str.split('–').str[0]
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

X = df.iloc[:, 2:-1].values
y = df.iloc[:, -1].values

In [4]:
print(X)

[[1.9950000e+03 1.0000000e+00 1.0000000e+00 ... 9.4585000e+01
  3.9440000e+08 3.0000000e+07]
 [1.9950000e+03 1.0000000e+00 0.0000000e+00 ... 1.4252000e+01
  2.6282194e+08 6.5000000e+07]
 [1.9950000e+03 0.0000000e+00 0.0000000e+00 ... 1.6951000e+01
  7.1500000e+07 2.5000000e+07]
 ...
 [2.0170000e+03 0.0000000e+00 0.0000000e+00 ... 4.1310000e+00
  0.0000000e+00 0.0000000e+00]
 [2.0180000e+03 0.0000000e+00 1.0000000e+00 ... 1.8096000e+01
  0.0000000e+00 0.0000000e+00]
 [1.9910000e+03 0.0000000e+00 0.0000000e+00 ... 1.4000000e+00
  0.0000000e+00 0.0000000e+00]]


In [5]:
print(y)

[3.92093023 3.43181818 3.25961538 ... 3.5        3.5        4.        ]


In [6]:
X = torch.from_numpy(X).type(torch.float32)
y = torch.from_numpy(y).type(torch.float32)
print(X.dtype)
print(y.dtype)

torch.float32
torch.float32


In [7]:
from sklearn.model_selection import train_test_split

# Assuming that X is your feature matrix and y are your targets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [8]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = torch.from_numpy(sc.fit_transform(X_train)).type(torch.float32)
X_test = torch.from_numpy(sc.transform(X_test)).type(torch.float32)

In [9]:
print(X_train.dtype)
print(X_test.dtype)
y_train = y_train.unsqueeze(1)
y_test = y_test.unsqueeze(1)
print(y_train.dtype, y_train.shape)
print(y_test.dtype, y_train.shape)

torch.float32
torch.float32
torch.float32 torch.Size([8750, 1])
torch.float32 torch.Size([8750, 1])


In [10]:
# Define the model

class MyRatingPredictionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyRatingPredictionModel, self).__init__()
        self.linear1 = nn.Linear(input_size, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        return x

In [11]:
# Instantiate and training the model
input_size = X.shape[1]
print(input_size)
model = MyRatingPredictionModel(input_size = input_size, output_size = 1)
# Define a loss function
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    # Training loop
    model.train()  # Set the model to training mode
    
    outputs = model(X_train)
#     outputs = outputs.squeeze()
    
#     print(outputs.shape)
#     print(y_train.shape)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

27
Epoch [1/100], Loss: 12.204316139221191
Epoch [2/100], Loss: 2.5816025733947754
Epoch [3/100], Loss: 6.524923324584961
Epoch [4/100], Loss: 1.3022880554199219
Epoch [5/100], Loss: 1.9649723768234253
Epoch [6/100], Loss: 2.8281517028808594
Epoch [7/100], Loss: 2.5217602252960205
Epoch [8/100], Loss: 1.6766396760940552
Epoch [9/100], Loss: 1.2220581769943237
Epoch [10/100], Loss: 1.5516694784164429
Epoch [11/100], Loss: 1.7162654399871826
Epoch [12/100], Loss: 1.2081903219223022
Epoch [13/100], Loss: 0.7312648296356201
Epoch [14/100], Loss: 0.6970945596694946
Epoch [15/100], Loss: 0.911310076713562
Epoch [16/100], Loss: 1.0914512872695923
Epoch [17/100], Loss: 1.1221897602081299
Epoch [18/100], Loss: 1.0302703380584717
Epoch [19/100], Loss: 0.9020541906356812
Epoch [20/100], Loss: 0.8109169602394104
Epoch [21/100], Loss: 0.7683389186859131
Epoch [22/100], Loss: 0.732395350933075
Epoch [23/100], Loss: 0.6757369637489319
Epoch [24/100], Loss: 0.6268576383590698
Epoch [25/100], Loss: 0.6

In [12]:
 # Validation loop
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculation
    total = 0
    correct = 0

    # Forward pass
    outputs = model(X_test)

    # Compute validation loss
    val_loss = criterion(outputs, y_test)

print(f'Validation Loss: {val_loss.item()}')

Validation Loss: 0.5913404822349548


In [13]:
def regression_accuracy(y_true, y_pred, tolerance=0.5):
    return np.mean(np.abs(y_true - y_pred) <= tolerance)

In [14]:
y_test_np = y_test.cpu().numpy()
outputs_np = outputs.cpu().numpy()
print(np.stack((y_test_np, outputs_np), axis=-1))

[[[1.        2.2813737]]

 [[3.1363637 3.1148193]]

 [[4.        3.5715854]]

 ...

 [[4.        2.8607743]]

 [[2.909091  2.7078369]]

 [[3.        3.503781 ]]]


In [15]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
# R-squared (R²): This is a statistical measure that represents the proportion of the variance for a dependent variable that’s explained by an independent variable or variables in a regression model. 
r2 = r2_score(y_test_np, outputs_np)
# Mean Squared Error (MSE): This is the average of the squared differences between the actual and predicted values. 
mse = np.mean((y_test_np - outputs_np)**2)
rmse = np.sqrt(mse)
# Mean Absolute Error (MAE): This is the average of the absolute differences between the actual and predicted values.
mae = mean_absolute_error(y_test_np, outputs_np)
# Find the maximum and minimum values in the dataset
max_value = torch.max(y_test)
min_value = torch.min(y_test)

# Calculate the Range
range = max_value - min_value
# print(range)

# Calculate the Accuracy
accuracy = 1 - (mae / range)

print(f"accuracy:{accuracy*100}% | root_mean_square:{mse} | mean_absolute_error:{mae} | r2_score: {r2}")

accuracy:88.85181427001953% | root_mean_square:0.5913404822349548 | mean_absolute_error:0.5574091672897339 | r2_score: 0.28555644673417113
