In [46]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from collections import defaultdict
from surprise.model_selection import GridSearchCV
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import pickle


In [47]:
ratings = pd.read_csv('../data/ratings.csv')
movies = pd.read_csv('movies_feature_engineered.csv')
ratings['rating'] = ratings['rating']*2

In [48]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


In [49]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 28 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   movieId          9742 non-null   int64  
 1   popularity       9742 non-null   float64
 2   runtime          9742 non-null   float64
 3   vote_average     9742 non-null   float64
 4   vote_count       9742 non-null   float64
 5   Action           9742 non-null   int64  
 6   Adventure        9742 non-null   int64  
 7   Animation        9742 non-null   int64  
 8   Children         9742 non-null   int64  
 9   Comedy           9742 non-null   int64  
 10  Crime            9742 non-null   int64  
 11  Documentary      9742 non-null   int64  
 12  Drama            9742 non-null   int64  
 13  Family           9742 non-null   int64  
 14  Fantasy          9742 non-null   int64  
 15  History          9742 non-null   int64  
 16  Horror           9742 non-null   int64  
 17  IMAX          

In [50]:
merged_df = ratings.merge(movies, on='movieId', how='left')
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 31 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   userId           100836 non-null  int64  
 1   movieId          100836 non-null  int64  
 2   rating           100836 non-null  float64
 3   timestamp        100836 non-null  int64  
 4   popularity       100836 non-null  float64
 5   runtime          100836 non-null  float64
 6   vote_average     100836 non-null  float64
 7   vote_count       100836 non-null  float64
 8   Action           100836 non-null  int64  
 9   Adventure        100836 non-null  int64  
 10  Animation        100836 non-null  int64  
 11  Children         100836 non-null  int64  
 12  Comedy           100836 non-null  int64  
 13  Crime            100836 non-null  int64  
 14  Documentary      100836 non-null  int64  
 15  Drama            100836 non-null  int64  
 16  Family           100836 non-null  int6

In [51]:
user_enc = LabelEncoder()
movie_enc = LabelEncoder()

merged_df['user_idx'] = user_enc.fit_transform(ratings['userId'])
merged_df['movie_idx'] = movie_enc.fit_transform(merged_df['movieId'])

num_users = merged_df['user_idx'].nunique()
num_movies = merged_df['movie_idx'].nunique()

In [52]:
1076 in list(movies['movieId'].unique())

True

In [53]:
pickle.dump(user_enc, open("user_enc.pkl", "wb"))
pickle.dump(movie_enc, open("movie_enc.pkl", "wb"))

In [54]:
num_features = ['vote_average', 'vote_count', 'popularity', 'runtime']
scaler = StandardScaler()
merged_df[num_features] = scaler.fit_transform(merged_df[num_features])

In [55]:
merged_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,popularity,runtime,vote_average,vote_count,Action,Adventure,...,Mystery,Romance,Sci-Fi,Science Fiction,TV Movie,Thriller,War,Western,user_idx,movie_idx
0,1,1,8.0,964982703,2.030276,-1.396378,1.162476,1.913321,0,1,...,0,0,0,0,0,0,0,0,0,0
1,1,3,8.0,964981247,-0.757716,-0.563646,-0.655004,-0.809518,0,0,...,0,1,0,0,0,0,0,0,0,2
2,1,6,8.0,964982224,1.170748,2.309283,1.101852,0.257766,1,0,...,0,0,0,0,0,0,0,0,0,5
3,1,47,10.0,964983815,1.913863,0.518907,1.666025,2.339322,0,0,...,1,0,0,0,0,1,0,0,0,43
4,1,50,10.0,964982931,0.291648,-0.355462,1.414869,0.710271,0,0,...,0,0,0,0,0,1,0,0,0,46


In [56]:
cat_features = ['user_idx', 'movie_idx']
cont_features = ['popularity', 'runtime',
       'vote_average', 'vote_count', 'Action', 'Adventure', 'Animation',
       'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family',
       'Fantasy', 'History', 'Horror', 'IMAX', 'Music', 'Musical', 'Mystery',
       'Romance', 'Sci-Fi', 'Science Fiction', 'TV Movie', 'Thriller', 'War',
       'Western'] 
X_cat = merged_df[cat_features].values  # (N, 2)
X_cont = merged_df[cont_features].values.astype('float32')  # (N, C)
y = merged_df['rating'].values.astype('float32')  # (N,)

In [57]:
class RatingDataset(Dataset):
    def __init__(self, X_cat, X_cont, y):
        self.X_cat = torch.tensor(X_cat, dtype=torch.long)
        self.X_cont = torch.tensor(X_cont, dtype=torch.float)
        self.y = torch.tensor(y, dtype=torch.float)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_cat[idx], self.X_cont[idx], self.y[idx]

dataset = RatingDataset(X_cat, X_cont, y)
dataloader = DataLoader(dataset, batch_size=512, shuffle=True)


In [58]:
class FactorizationMachineModel(nn.Module):
    def __init__(self, num_users, num_movies, num_cont_features, emb_dim=32):
        super().__init__()

        # Embedding layers
        self.user_emb = nn.Embedding(num_users, emb_dim)
        self.movie_emb = nn.Embedding(num_movies, emb_dim)

        # Linear (first-order) terms for categorical
        self.user_bias = nn.Embedding(num_users, 1)
        self.movie_bias = nn.Embedding(num_movies, 1)

        # Linear for continuous
        self.linear_cont = nn.Linear(num_cont_features, 1)

        self.dropout = nn.Dropout(0.2)
        self.output = nn.Linear(1, 1)  # optional non-linearity head

    def forward(self, X_cat, X_cont):
        user_idx, movie_idx = X_cat[:, 0], X_cat[:, 1]

        user_vec = self.user_emb(user_idx)
        movie_vec = self.movie_emb(movie_idx)

        # ----- FM interaction (dot product of embeddings) -----
        interaction = torch.sum(user_vec * movie_vec, dim=1, keepdim=True)

        # ----- Linear terms -----
        linear_cat = self.user_bias(user_idx) + self.movie_bias(movie_idx)
        linear_cont = self.linear_cont(X_cont)

        # ----- Final output -----
        out = interaction + linear_cat + linear_cont
        out = self.output(self.dropout(out))

        return out.squeeze(1)


In [59]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, y_pred, y_true):
        return torch.sqrt(self.mse(y_pred, y_true))


In [60]:
model = FactorizationMachineModel(
    num_users=num_users,
    num_movies=num_movies,
    num_cont_features=X_cont.shape[1],
    emb_dim=32
).to("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [61]:
def train_model(model, dataloader, epochs=20):
    model.train()
    device = next(model.parameters()).device

    for epoch in range(epochs):
        total_loss = 0.0

        for X_cat, X_cont, y in tqdm(dataloader):
            X_cat = X_cat.to(device)
            X_cont = X_cont.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            preds = model(X_cat, X_cont)
            loss = criterion(preds, y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * len(y)

        avg_loss = total_loss / len(dataloader.dataset)
        print(f"Epoch {epoch+1}/{epochs} — Loss: {avg_loss:.4f}")


In [62]:
train_model(model, dataloader)


100%|██████████| 197/197 [00:04<00:00, 43.43it/s] 


Epoch 1/20 — Loss: 71.6706


100%|██████████| 197/197 [00:01<00:00, 124.09it/s]


Epoch 2/20 — Loss: 51.1824


100%|██████████| 197/197 [00:01<00:00, 109.74it/s]


Epoch 3/20 — Loss: 39.4931


100%|██████████| 197/197 [00:01<00:00, 127.72it/s]


Epoch 4/20 — Loss: 32.7154


100%|██████████| 197/197 [00:01<00:00, 115.05it/s]


Epoch 5/20 — Loss: 28.0608


100%|██████████| 197/197 [00:01<00:00, 118.50it/s]


Epoch 6/20 — Loss: 24.6590


100%|██████████| 197/197 [00:01<00:00, 114.71it/s]


Epoch 7/20 — Loss: 21.8419


100%|██████████| 197/197 [00:01<00:00, 117.08it/s]


Epoch 8/20 — Loss: 19.4919


100%|██████████| 197/197 [00:01<00:00, 110.77it/s]


Epoch 9/20 — Loss: 17.5424


100%|██████████| 197/197 [00:01<00:00, 125.15it/s]


Epoch 10/20 — Loss: 15.8921


100%|██████████| 197/197 [00:01<00:00, 118.71it/s]


Epoch 11/20 — Loss: 14.4390


100%|██████████| 197/197 [00:01<00:00, 118.35it/s]


Epoch 12/20 — Loss: 13.1480


100%|██████████| 197/197 [00:01<00:00, 108.82it/s]


Epoch 13/20 — Loss: 12.1257


100%|██████████| 197/197 [00:01<00:00, 122.34it/s]


Epoch 14/20 — Loss: 11.2430


100%|██████████| 197/197 [00:01<00:00, 120.03it/s]


Epoch 15/20 — Loss: 10.4246


100%|██████████| 197/197 [00:01<00:00, 111.20it/s]


Epoch 16/20 — Loss: 9.7921


100%|██████████| 197/197 [00:01<00:00, 122.43it/s]


Epoch 17/20 — Loss: 9.1334


100%|██████████| 197/197 [00:01<00:00, 112.84it/s]


Epoch 18/20 — Loss: 8.5426


100%|██████████| 197/197 [00:01<00:00, 120.61it/s]


Epoch 19/20 — Loss: 8.0796


100%|██████████| 197/197 [00:01<00:00, 110.50it/s]

Epoch 20/20 — Loss: 7.6134





In [63]:
from sklearn.model_selection import train_test_split

X_cat_train, X_cat_val, X_cont_train, X_cont_val, y_train, y_val = train_test_split(
    X_cat, X_cont, y, test_size=0.2, random_state=42)


In [64]:
train_dataset = RatingDataset(X_cat_train, X_cont_train, y_train)
val_dataset = RatingDataset(X_cat_val, X_cont_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)


In [65]:

def evaluate_model(model, val_loader):
    model.eval()
    device = next(model.parameters()).device

    all_preds = []
    all_targets = []
    def accuracy_within_threshold(y_true, y_pred, threshold=1.0):
        return np.mean(np.abs(y_true - y_pred) <= threshold)

    with torch.no_grad():
        for X_cat, X_cont, y in val_loader:
            X_cat = X_cat.to(device)
            X_cont = X_cont.to(device)
            y = y.to(device)

            preds = model(X_cat, X_cont)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(y.cpu().numpy())

    rmse = np.sqrt(mean_squared_error(all_targets, all_preds))
    mae = mean_absolute_error(all_targets, all_preds)
    acc_05 = accuracy_within_threshold(np.array(all_targets), np.array(all_preds), threshold=0.5)
    acc_1 = accuracy_within_threshold(np.array(all_targets), np.array(all_preds), threshold=1.0)
    acc_2 = accuracy_within_threshold(np.array(all_targets), np.array(all_preds), threshold=2.0)

    print(f"Accuracy within 0.5: {acc_05:.4f}")
    print(f"Accuracy within 1.0: {acc_1:.4f}")
    print(f"Accuracy within 2.0: {acc_2:.4f}")
    print(f"Validation RMSE: {rmse:.4f}")
    print(f"Validation MAE : {mae:.4f}")


In [66]:
model = FactorizationMachineModel(
    num_users=num_users,
    num_movies=num_movies,
    num_cont_features=X_cont.shape[1],
    emb_dim=64
).to("cuda" if torch.cuda.is_available() else "cpu")

criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [67]:
train_model(model, train_loader, epochs=50)


100%|██████████| 158/158 [00:01<00:00, 93.87it/s] 


Epoch 1/50 — Loss: 10.7132


100%|██████████| 158/158 [00:01<00:00, 112.68it/s]


Epoch 2/50 — Loss: 9.0871


100%|██████████| 158/158 [00:01<00:00, 111.91it/s]


Epoch 3/50 — Loss: 7.8371


100%|██████████| 158/158 [00:01<00:00, 109.88it/s]


Epoch 4/50 — Loss: 6.9608


100%|██████████| 158/158 [00:01<00:00, 95.91it/s] 


Epoch 5/50 — Loss: 6.3328


100%|██████████| 158/158 [00:01<00:00, 104.61it/s]


Epoch 6/50 — Loss: 5.8856


100%|██████████| 158/158 [00:01<00:00, 111.52it/s]


Epoch 7/50 — Loss: 5.5509


100%|██████████| 158/158 [00:01<00:00, 100.66it/s]


Epoch 8/50 — Loss: 5.2402


100%|██████████| 158/158 [00:01<00:00, 105.42it/s]


Epoch 9/50 — Loss: 4.9626


100%|██████████| 158/158 [00:01<00:00, 99.72it/s] 


Epoch 10/50 — Loss: 4.6954


100%|██████████| 158/158 [00:01<00:00, 109.35it/s]


Epoch 11/50 — Loss: 4.4571


100%|██████████| 158/158 [00:01<00:00, 108.87it/s]


Epoch 12/50 — Loss: 4.2267


100%|██████████| 158/158 [00:01<00:00, 109.47it/s]


Epoch 13/50 — Loss: 4.0197


100%|██████████| 158/158 [00:01<00:00, 96.32it/s] 


Epoch 14/50 — Loss: 3.8358


100%|██████████| 158/158 [00:01<00:00, 109.52it/s]


Epoch 15/50 — Loss: 3.6656


100%|██████████| 158/158 [00:01<00:00, 111.36it/s]


Epoch 16/50 — Loss: 3.5214


100%|██████████| 158/158 [00:01<00:00, 111.74it/s]


Epoch 17/50 — Loss: 3.3625


100%|██████████| 158/158 [00:01<00:00, 99.16it/s] 


Epoch 18/50 — Loss: 3.2479


100%|██████████| 158/158 [00:01<00:00, 109.97it/s]


Epoch 19/50 — Loss: 3.1197


100%|██████████| 158/158 [00:01<00:00, 108.25it/s]


Epoch 20/50 — Loss: 3.0120


100%|██████████| 158/158 [00:01<00:00, 98.54it/s] 


Epoch 21/50 — Loss: 2.9089


100%|██████████| 158/158 [00:01<00:00, 109.82it/s]


Epoch 22/50 — Loss: 2.8232


100%|██████████| 158/158 [00:01<00:00, 109.12it/s]


Epoch 23/50 — Loss: 2.7296


100%|██████████| 158/158 [00:01<00:00, 108.78it/s]


Epoch 24/50 — Loss: 2.6520


100%|██████████| 158/158 [00:01<00:00, 99.85it/s] 


Epoch 25/50 — Loss: 2.5793


100%|██████████| 158/158 [00:01<00:00, 109.72it/s]


Epoch 26/50 — Loss: 2.4995


100%|██████████| 158/158 [00:01<00:00, 109.46it/s]


Epoch 27/50 — Loss: 2.4297


100%|██████████| 158/158 [00:01<00:00, 103.10it/s]


Epoch 28/50 — Loss: 2.3746


100%|██████████| 158/158 [00:01<00:00, 102.93it/s]


Epoch 29/50 — Loss: 2.3128


100%|██████████| 158/158 [00:01<00:00, 110.91it/s]


Epoch 30/50 — Loss: 2.2599


100%|██████████| 158/158 [00:01<00:00, 110.00it/s]


Epoch 31/50 — Loss: 2.2000


100%|██████████| 158/158 [00:01<00:00, 110.47it/s]


Epoch 32/50 — Loss: 2.1478


100%|██████████| 158/158 [00:01<00:00, 100.43it/s]


Epoch 33/50 — Loss: 2.1060


100%|██████████| 158/158 [00:01<00:00, 108.18it/s]


Epoch 34/50 — Loss: 2.0497


100%|██████████| 158/158 [00:01<00:00, 103.14it/s]


Epoch 35/50 — Loss: 2.0142


100%|██████████| 158/158 [00:01<00:00, 111.12it/s]


Epoch 36/50 — Loss: 1.9750


100%|██████████| 158/158 [00:01<00:00, 102.50it/s]


Epoch 37/50 — Loss: 1.9368


100%|██████████| 158/158 [00:01<00:00, 111.94it/s]


Epoch 38/50 — Loss: 1.8933


100%|██████████| 158/158 [00:01<00:00, 108.66it/s]


Epoch 39/50 — Loss: 1.8661


100%|██████████| 158/158 [00:01<00:00, 101.69it/s]


Epoch 40/50 — Loss: 1.8299


100%|██████████| 158/158 [00:01<00:00, 109.13it/s]


Epoch 41/50 — Loss: 1.8029


100%|██████████| 158/158 [00:01<00:00, 109.48it/s]


Epoch 42/50 — Loss: 1.7622


100%|██████████| 158/158 [00:01<00:00, 95.36it/s] 


Epoch 43/50 — Loss: 1.7369


100%|██████████| 158/158 [00:01<00:00, 109.28it/s]


Epoch 44/50 — Loss: 1.7101


100%|██████████| 158/158 [00:01<00:00, 110.26it/s]


Epoch 45/50 — Loss: 1.6780


100%|██████████| 158/158 [00:01<00:00, 100.81it/s]


Epoch 46/50 — Loss: 1.6436


100%|██████████| 158/158 [00:01<00:00, 108.03it/s]


Epoch 47/50 — Loss: 1.6229


100%|██████████| 158/158 [00:01<00:00, 110.55it/s]


Epoch 48/50 — Loss: 1.5990


100%|██████████| 158/158 [00:01<00:00, 99.83it/s] 


Epoch 49/50 — Loss: 1.5631


100%|██████████| 158/158 [00:01<00:00, 107.10it/s]

Epoch 50/50 — Loss: 1.5361





In [68]:
evaluate_model(model, val_loader)

Accuracy within 0.5: 0.1931
Accuracy within 1.0: 0.3758
Accuracy within 2.0: 0.6708
Validation RMSE: 2.1091
Validation MAE : 1.6617


In [69]:
model = FactorizationMachineModel(
    num_users=num_users,
    num_movies=num_movies,
    num_cont_features=X_cont.shape[1],
    emb_dim=64
).to("cuda" if torch.cuda.is_available() else "cpu")

criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [70]:
train_model(model, dataloader, epochs=50)

100%|██████████| 197/197 [00:01<00:00, 104.62it/s]


Epoch 1/50 — Loss: 9.6338


100%|██████████| 197/197 [00:02<00:00, 97.31it/s] 


Epoch 2/50 — Loss: 8.0065


100%|██████████| 197/197 [00:01<00:00, 115.37it/s]


Epoch 3/50 — Loss: 6.9681


100%|██████████| 197/197 [00:01<00:00, 137.85it/s]


Epoch 4/50 — Loss: 6.3379


100%|██████████| 197/197 [00:01<00:00, 125.58it/s]


Epoch 5/50 — Loss: 5.9150


100%|██████████| 197/197 [00:01<00:00, 112.49it/s]


Epoch 6/50 — Loss: 5.5646


100%|██████████| 197/197 [00:02<00:00, 96.41it/s] 


Epoch 7/50 — Loss: 5.1996


100%|██████████| 197/197 [00:01<00:00, 100.66it/s]


Epoch 8/50 — Loss: 4.8571


100%|██████████| 197/197 [00:01<00:00, 105.63it/s]


Epoch 9/50 — Loss: 4.5464


100%|██████████| 197/197 [00:01<00:00, 99.36it/s] 


Epoch 10/50 — Loss: 4.2583


100%|██████████| 197/197 [00:01<00:00, 104.06it/s]


Epoch 11/50 — Loss: 4.0000


100%|██████████| 197/197 [00:01<00:00, 99.13it/s] 


Epoch 12/50 — Loss: 3.7816


100%|██████████| 197/197 [00:01<00:00, 103.98it/s]


Epoch 13/50 — Loss: 3.5716


100%|██████████| 197/197 [00:01<00:00, 102.26it/s]


Epoch 14/50 — Loss: 3.3983


100%|██████████| 197/197 [00:01<00:00, 107.38it/s]


Epoch 15/50 — Loss: 3.2339


100%|██████████| 197/197 [00:01<00:00, 106.54it/s]


Epoch 16/50 — Loss: 3.0942


100%|██████████| 197/197 [00:01<00:00, 99.89it/s] 


Epoch 17/50 — Loss: 2.9699


100%|██████████| 197/197 [00:01<00:00, 101.01it/s]


Epoch 18/50 — Loss: 2.8502


100%|██████████| 197/197 [00:01<00:00, 99.45it/s] 


Epoch 19/50 — Loss: 2.7431


100%|██████████| 197/197 [00:01<00:00, 107.36it/s]


Epoch 20/50 — Loss: 2.6371


100%|██████████| 197/197 [00:01<00:00, 108.15it/s]


Epoch 21/50 — Loss: 2.5561


100%|██████████| 197/197 [00:01<00:00, 106.39it/s]


Epoch 22/50 — Loss: 2.4705


100%|██████████| 197/197 [00:02<00:00, 97.05it/s] 


Epoch 23/50 — Loss: 2.3944


100%|██████████| 197/197 [00:01<00:00, 109.12it/s]


Epoch 24/50 — Loss: 2.3275


100%|██████████| 197/197 [00:01<00:00, 107.59it/s]


Epoch 25/50 — Loss: 2.2504


100%|██████████| 197/197 [00:01<00:00, 99.54it/s] 


Epoch 26/50 — Loss: 2.1938


100%|██████████| 197/197 [00:01<00:00, 110.25it/s]


Epoch 27/50 — Loss: 2.1402


100%|██████████| 197/197 [00:01<00:00, 138.43it/s]


Epoch 28/50 — Loss: 2.0817


100%|██████████| 197/197 [00:01<00:00, 127.64it/s]


Epoch 29/50 — Loss: 2.0366


100%|██████████| 197/197 [00:01<00:00, 120.67it/s]


Epoch 30/50 — Loss: 1.9963


100%|██████████| 197/197 [00:01<00:00, 105.77it/s]


Epoch 31/50 — Loss: 1.9422


100%|██████████| 197/197 [00:01<00:00, 102.23it/s]


Epoch 32/50 — Loss: 1.9099


100%|██████████| 197/197 [00:01<00:00, 120.12it/s]


Epoch 33/50 — Loss: 1.8715


100%|██████████| 197/197 [00:01<00:00, 139.13it/s]


Epoch 34/50 — Loss: 1.8349


100%|██████████| 197/197 [00:01<00:00, 121.99it/s]


Epoch 35/50 — Loss: 1.7992


100%|██████████| 197/197 [00:01<00:00, 105.25it/s]


Epoch 36/50 — Loss: 1.7691


100%|██████████| 197/197 [00:01<00:00, 104.29it/s]


Epoch 37/50 — Loss: 1.7404


100%|██████████| 197/197 [00:01<00:00, 102.72it/s]


Epoch 38/50 — Loss: 1.7087


100%|██████████| 197/197 [00:01<00:00, 100.91it/s]


Epoch 39/50 — Loss: 1.6726


100%|██████████| 197/197 [00:01<00:00, 107.35it/s]


Epoch 40/50 — Loss: 1.6483


100%|██████████| 197/197 [00:01<00:00, 107.17it/s]


Epoch 41/50 — Loss: 1.6237


100%|██████████| 197/197 [00:02<00:00, 95.81it/s] 


Epoch 42/50 — Loss: 1.5858


100%|██████████| 197/197 [00:01<00:00, 108.30it/s]


Epoch 43/50 — Loss: 1.5633


100%|██████████| 197/197 [00:01<00:00, 98.63it/s] 


Epoch 44/50 — Loss: 1.5355


100%|██████████| 197/197 [00:01<00:00, 106.14it/s]


Epoch 45/50 — Loss: 1.5026


100%|██████████| 197/197 [00:02<00:00, 94.44it/s] 


Epoch 46/50 — Loss: 1.4778


100%|██████████| 197/197 [00:01<00:00, 106.59it/s]


Epoch 47/50 — Loss: 1.4537


100%|██████████| 197/197 [00:01<00:00, 105.66it/s]


Epoch 48/50 — Loss: 1.4360


100%|██████████| 197/197 [00:01<00:00, 99.91it/s] 


Epoch 49/50 — Loss: 1.4095


100%|██████████| 197/197 [00:01<00:00, 103.73it/s]

Epoch 50/50 — Loss: 1.3923





In [71]:
torch.save(model.state_dict(), "fm_model.pt")
pickle.dump(user_enc, open("user_enc.pkl", "wb"))
pickle.dump(movie_enc, open("movie_enc.pkl", "wb"))

# Also export the cont_features list
import json
with open("cont_features.json", "w") as f:
    json.dump(cont_features, f)
