In [1]:
# import libraray
import torch
from torch import nn, optim
import pandas as pd
import numpy as np

In [2]:
# Read Dataset
df = pd.read_csv("./dataset/movie-rating-additional.csv")
df['Year'] = df['Year'].str.split('–').str[0]
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

X = df.iloc[:, 2:-1].values
y = df.iloc[:, -1].values

In [3]:
print(X)

[[1.9950000e+03 1.0000000e+00 1.0000000e+00 ... 3.9440000e+08
  3.0000000e+07 8.1000000e+01]
 [1.9950000e+03 1.0000000e+00 0.0000000e+00 ... 2.6282194e+08
  6.5000000e+07 1.0400000e+02]
 [1.9950000e+03 0.0000000e+00 0.0000000e+00 ... 7.1500000e+07
  2.5000000e+07 1.0100000e+02]
 ...
 [2.0170000e+03 0.0000000e+00 0.0000000e+00 ... 0.0000000e+00
  0.0000000e+00 9.6000000e+01]
 [2.0180000e+03 0.0000000e+00 1.0000000e+00 ... 0.0000000e+00
  0.0000000e+00 9.0000000e+01]
 [1.9910000e+03 0.0000000e+00 0.0000000e+00 ... 0.0000000e+00
  0.0000000e+00 8.5000000e+01]]


In [4]:
print(y)

[3.92093023 3.43181818 3.25961538 ... 3.5        3.5        4.        ]


In [5]:
X = torch.from_numpy(X).type(torch.float32)
y = torch.from_numpy(y).type(torch.float32)
print(X.dtype)
print(y.dtype)

torch.float32
torch.float32


In [6]:
from sklearn.model_selection import train_test_split

# Assuming that X is your feature matrix and y are your targets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [7]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = torch.from_numpy(sc.fit_transform(X_train)).type(torch.float32)
X_test = torch.from_numpy(sc.transform(X_test)).type(torch.float32)

In [8]:
print(X_train.dtype)
print(X_test.dtype)
y_train = y_train.unsqueeze(1)
y_test = y_test.unsqueeze(1)
print(y_train.dtype, y_train.shape)
print(y_test.dtype, y_train.shape)

torch.float32
torch.float32
torch.float32 torch.Size([8750, 1])
torch.float32 torch.Size([8750, 1])


In [9]:
# Define the model

class MyRatingPredictionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyRatingPredictionModel, self).__init__()
        self.linear1 = nn.Linear(input_size, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        return x

In [10]:
# Instantiate and training the model
input_size = X.shape[1]
# print(input_size)
model = MyRatingPredictionModel(input_size = input_size, output_size = 1)
# Define a loss function
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    # Training loop
    model.train()  # Set the model to training mode
    
    outputs = model(X_train)
#     outputs = outputs.squeeze()
    
#     print(outputs.shape)
#     print(y_train.shape)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

Epoch [1/100], Loss: 10.83878231048584
Epoch [2/100], Loss: 2.7733380794525146
Epoch [3/100], Loss: 2.3103785514831543
Epoch [4/100], Loss: 0.9996708631515503
Epoch [5/100], Loss: 1.2502743005752563
Epoch [6/100], Loss: 1.1482031345367432
Epoch [7/100], Loss: 1.0730552673339844
Epoch [8/100], Loss: 1.0922129154205322
Epoch [9/100], Loss: 0.8524869084358215
Epoch [10/100], Loss: 0.7030393481254578
Epoch [11/100], Loss: 0.7322006821632385
Epoch [12/100], Loss: 0.7514469027519226
Epoch [13/100], Loss: 0.7643797993659973
Epoch [14/100], Loss: 0.8033482432365417
Epoch [15/100], Loss: 0.7669041156768799
Epoch [16/100], Loss: 0.6779187917709351
Epoch [17/100], Loss: 0.6441068053245544
Epoch [18/100], Loss: 0.6368420720100403
Epoch [19/100], Loss: 0.6182554364204407
Epoch [20/100], Loss: 0.6295669674873352
Epoch [21/100], Loss: 0.648989200592041
Epoch [22/100], Loss: 0.6285043358802795
Epoch [23/100], Loss: 0.6148910522460938
Epoch [24/100], Loss: 0.6126564741134644
Epoch [25/100], Loss: 0.590

In [11]:
 # Validation loop
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculation
    total = 0
    correct = 0

    # Forward pass
    outputs = model(X_test)

    # Compute validation loss
    val_loss = criterion(outputs, y_test)

print(f'Validation Loss: {val_loss.item()}')

Validation Loss: 0.5134760737419128


In [12]:
# def regression_accuracy(y_true, y_pred, tolerance=0.5):
#     return np.mean(np.abs(y_true - y_pred) <= tolerance)

In [13]:
y_test_np = y_test.cpu().numpy()
outputs_np = outputs.cpu().numpy()
print(np.stack((y_test_np, outputs_np), axis=-1))

[[[2.5       3.1402633]]

 [[3.        3.1484454]]

 [[3.        3.5812724]]

 ...

 [[3.5       3.1630433]]

 [[3.55      2.9367127]]

 [[2.5       1.8058898]]]


In [15]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
# R-squared (R²): This is a statistical measure that represents the proportion of the variance for a dependent variable that’s explained by an independent variable or variables in a regression model. 
r2 = r2_score(y_test_np, outputs_np)
# Mean Squared Error (MSE): This is the average of the squared differences between the actual and predicted values. 
mse = np.mean((y_test_np - outputs_np)**2)
rmse = np.sqrt(mse)
# Mean Absolute Error (MAE): This is the average of the absolute differences between the actual and predicted values.
mae = mean_absolute_error(y_test_np, outputs_np)
# Find the maximum and minimum values in the dataset
max_value = torch.max(y_test)
min_value = torch.min(y_test)

# Calculate the Range
range = max_value - min_value
# print(range)

# Calculate the Accuracy
accuracy = 1 - (mae / range)

print(f"accuracy:{accuracy*100}% \nroot_mean_square:{mse} \nmean_absolute_error:{mae} \nr2_score: {r2}")

accuracy:89.63725280761719% 
root_mean_square:0.5134760737419128 
mean_absolute_error:0.5181372165679932 
r2_score: 0.28250340819933906
