In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Dropout, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load ratings data
ratings = pd.read_csv('/content/u.data', sep='\t', header=None, names=['userId', 'itemId', 'rating', 'timestamp'])

# Load movie metadata
movies = pd.read_csv('/content/u.item', sep='|', header=None, usecols=[0, 1], encoding='latin-1', names=['itemId', 'title'])

# Encode user and item IDs
user_encoder = {user: idx for idx, user in enumerate(ratings['userId'].unique())}
item_encoder = {item: idx for idx, item in enumerate(movies['itemId'].unique())}

ratings['user'] = ratings['userId'].map(user_encoder)
ratings['item'] = ratings['itemId'].map(item_encoder)

num_users = len(user_encoder)
num_items = len(item_encoder)

# Split the data into training and testing sets
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices(({
    'user': train_data['user'].values,
    'item': train_data['item'].values
}, train_data['rating'].values))
test_dataset = tf.data.Dataset.from_tensor_slices(({
    'user': test_data['user'].values,
    'item': test_data['item'].values
}, test_data['rating'].values))

train_dataset = train_dataset.batch(32).shuffle(10000)
test_dataset = test_dataset.batch(32)



In [3]:
# Build Neural Matrix Factorization model
def build_matrix_factorization_model(embedding_size=50, dropout_rate=0.2):
    user_input = Input(shape=(1,), name='user')
    item_input = Input(shape=(1,), name='item')

    user_embedding = Embedding(num_users, embedding_size, name='user_embedding')(user_input)
    item_embedding = Embedding(num_items, embedding_size, name='item_embedding')(item_input)

    user_vecs = Flatten()(user_embedding)
    item_vecs = Flatten()(item_embedding)

    # Combine embeddings
    combined = Concatenate()([user_vecs, item_vecs])

    # Dense layers for learning interaction
    dense = Dense(128, activation='relu')(combined)
    dropout = Dropout(dropout_rate)(dense)
    output = Dense(1)(dropout)

    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    return model

# Grid search parameters (reduced)
param_grid = {
    'embedding_size': [20, 50],
    'dropout_rate': [0.2, 0.3],
    'epochs': [5],
    'batch_size': [32]
}

best_params = {}
best_score = float('inf')



In [4]:
# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Perform manual grid search
for embedding_size in param_grid['embedding_size']:
    for dropout_rate in param_grid['dropout_rate']:
        for epochs in param_grid['epochs']:
            for batch_size in param_grid['batch_size']:
                print(f"Testing parameters: embedding_size={embedding_size}, dropout_rate={dropout_rate}, epochs={epochs}, batch_size={batch_size}")

                model = build_matrix_factorization_model(embedding_size, dropout_rate)

                # Train the model
                model.fit(train_dataset, epochs=epochs, validation_data=test_dataset, verbose=1, callbacks=[early_stopping])

                # Evaluate the model
                y_pred = model.predict(test_dataset)
                y_true = np.concatenate([y for x, y in test_dataset], axis=0)
                score = mean_squared_error(y_true, y_pred)

                print(f"Score: {score}")

                if score < best_score:
                    best_score = score
                    best_params = {
                        'embedding_size': embedding_size,
                        'dropout_rate': dropout_rate,
                        'epochs': epochs,
                        'batch_size': batch_size
                    }

print(f"Best parameters: {best_params}")
print(f"Best score: {best_score}")



Testing parameters: embedding_size=20, dropout_rate=0.2, epochs=5, batch_size=32
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Score: 0.8640957621497919
Testing parameters: embedding_size=20, dropout_rate=0.3, epochs=5, batch_size=32
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Score: 0.8596139141292302
Testing parameters: embedding_size=50, dropout_rate=0.2, epochs=5, batch_size=32
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Score: 0.8565713692779467
Testing parameters: embedding_size=50, dropout_rate=0.3, epochs=5, batch_size=32
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Score: 0.8658848051588955
Best parameters: {'embedding_size': 50, 'dropout_rate': 0.2, 'epochs': 5, 'batch_size': 32}
Best score: 0.8565713692779467


In [8]:
movies.head()

Unnamed: 0,itemId,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [9]:
# Train the model with the best parameters
best_model = build_matrix_factorization_model(best_params['embedding_size'], best_params['dropout_rate'])
best_model.fit(train_dataset, epochs=best_params['epochs'], validation_data=test_dataset)




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7fe7b3306da0>

In [29]:
def get_movie_recommendations(movie_title, top_n=10):
    # Print the movie title being searched
    print(f"Searching for movie: '{movie_title}'")

    # Print available titles for debugging
    available_titles = movies['title'].unique()
    print("Available Titles:", available_titles[:20])  # Show more titles for debugging

    # Strip leading and trailing spaces from titles
    movies['title'] = movies['title'].str.strip()

    # Check if movie_title exists in the dataset with exact matching
    movie_matches = movies[movies['title'].str.lower() == movie_title.lower()]
    print(f"Movie Matches:\n{movie_matches}")  # Debugging line

    if movie_matches.empty:
        raise ValueError(f"Movie '{movie_title}' not found.")

    # Extract the movie ID
    movie_id = movie_matches['itemId'].values[0]

    # Check if movie_id is in the encoder
    movie_encoded = item_encoder.get(movie_id, None)
    if movie_encoded is None:
        raise ValueError("Movie ID not found in encoder.")

    # Get the embedding of the input movie
    movie_embedding = best_model.get_layer('item_embedding').get_weights()[0][movie_encoded]

    # Calculate the similarity between the input movie and all other movies
    all_embeddings = best_model.get_layer('item_embedding').get_weights()[0]
    similarities = np.dot(all_embeddings, movie_embedding)

    # Get the top N most similar movies
    top_indices = np.argsort(similarities)[-top_n-1:-1][::-1]  # Exclude the movie itself
    recommended_movie_ids = [list(item_encoder.keys())[list(item_encoder.values()).index(idx)] for idx in top_indices]

    # Get movie titles for the recommended movie IDs
    recommended_movies = movies[movies['itemId'].isin(recommended_movie_ids)]['title']
    return recommended_movies


In [30]:
print(movies['title'].unique())  # Print all unique movie titles to check for exact match


['Toy Story (1995)' 'GoldenEye (1995)' 'Four Rooms (1995)' ...
 'B. Monkey (1998)' 'You So Crazy (1994)'
 'Scream of Stone (Schrei aus Stein) (1991)']


In [32]:

try:
    recommended_movies = get_movie_recommendations('Four Rooms (1995)')
    print("Recommended Movies:")
    print(recommended_movies)
except ValueError as e:
    print(e)

Searching for movie: 'Four Rooms (1995)'
Available Titles: ['Toy Story (1995)' 'GoldenEye (1995)' 'Four Rooms (1995)'
 'Get Shorty (1995)' 'Copycat (1995)'
 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)'
 'Twelve Monkeys (1995)' 'Babe (1995)' 'Dead Man Walking (1995)'
 'Richard III (1995)' 'Seven (Se7en) (1995)' 'Usual Suspects, The (1995)'
 'Mighty Aphrodite (1995)' 'Postino, Il (1994)'
 "Mr. Holland's Opus (1995)" 'French Twist (Gazon maudit) (1995)'
 'From Dusk Till Dawn (1996)' 'White Balloon, The (1995)'
 "Antonia's Line (1995)" 'Angels and Insects (1995)']
Movie Matches:
   itemId              title
2       3  Four Rooms (1995)
Recommended Movies:
137          D3: The Mighty Ducks (1996)
455           Beverly Hills Ninja (1997)
679            Kull the Conqueror (1997)
719                  First Knight (1995)
925                Down Periscope (1996)
975                          Solo (1996)
1028                    Jury Duty (1995)
1029     Beverly Hillbillies, The (1993)
14