In [None]:
!pip install surprise



In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [None]:
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [None]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7acec0667460>

# **Collaborative** **Filtering**

In [None]:
from surprise import KNNBasic


algo = KNNBasic()
algo.fit(trainset)

def collaborative_filtering_recommendations(movie_title, top_n=5):
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]

    movie_inner_id = algo.trainset.to_inner_iid(movie_id)

    movie_neighbors = algo.get_neighbors(movie_inner_id, k=top_n)

    movie_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in movie_neighbors)

    collab_recommendation_titles = [movies[movies['movieId'] == movie]['title'].values[0] for movie in movie_neighbors]

    return collab_recommendation_titles

Computing the msd similarity matrix...
Done computing similarity matrix.


In [None]:
movie_title = "Toy Story (1995)"
collab_rating = collaborative_filtering_recommendations(movie_title)
collab_rating

['Psycho (1960)',
 'Groundhog Day (1993)',
 'Wolf of Wall Street, The (2013)',
 '2012 (2009)',
 'Dunston Checks In (1996)']

# **Content**-**Based** **Filtering**

In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'].fillna(''))

In [None]:
def content_based_recommendations(movie_title):
    tfidf_index = movies[movies['title'] == movie_title].index[0]
    cosine_similarities = linear_kernel(tfidf_matrix[tfidf_index], tfidf_matrix).flatten()
    content_indices = cosine_similarities.argsort()[::-1][1:]
    return [(movies.iloc[idx]['title'], cosine_similarities[idx]) for idx in content_indices]

In [None]:
movie_title = "Toy Story (1995)"
content_ratings = content_based_recommendations(movie_title)

print("\nContent-Based Recommendations:")
for movie, similarity in content_ratings[:5]:
    print(f"{movie} - Similarity: {similarity:.2f}")


Content-Based Recommendations:
Toy Story 2 (1999) - Similarity: 1.00
Monsters, Inc. (2001) - Similarity: 1.00
Tale of Despereaux, The (2008) - Similarity: 1.00
Emperor's New Groove, The (2000) - Similarity: 1.00
Toy Story (1995) - Similarity: 1.00


# **Neural** **Collaborative** **Filtering**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
dataset_path = tf.keras.utils.get_file("ml-latest-small.zip", url, extract=True)
data_dir = dataset_path.replace("ml-latest-small.zip", "ml-latest-small")

ratings = pd.read_csv(f"{data_dir}/ratings.csv")
ratings = ratings[['userId', 'movieId', 'rating']]

ratings['userId'] = ratings['userId'].astype('category').cat.codes.values
ratings['movieId'] = ratings['movieId'].astype('category').cat.codes.values

train, test = train_test_split(ratings, test_size=0.2, random_state=42)

Downloading data from https://files.grouplens.org/datasets/movielens/ml-latest-small.zip


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate

num_users = ratings['userId'].nunique()
num_items = ratings['movieId'].nunique()
embedding_dim = 50

user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)

user_vector = Flatten()(user_embedding)
item_vector = Flatten()(item_embedding)

concat = Concatenate()([user_vector, item_vector])

dense = Dense(128, activation='relu')(concat)
dense = Dense(64, activation='relu')(dense)
output = Dense(1)(dense)

model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 item_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 50)                30500     ['user_input[0][0]']          
                                                                                                  
 item_embedding (Embedding)  (None, 1, 50)                486200    ['item_input[0][0]']          
                                                                                              

In [None]:
train_user_data = train['userId'].values
train_item_data = train['movieId'].values
train_ratings = train['rating'].values

history = model.fit([train_user_data, train_item_data], train_ratings, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
movies = pd.read_csv(f"{data_dir}/movies.csv")
movies = movies[['movieId', 'title']]

movie_id_to_title = dict(zip(movies['movieId'].astype('category').cat.codes.values, movies['title']))
def recommend_movie_titles(user_id, num_recommendations=5):
    all_movie_ids = ratings['movieId'].unique()

    user_array = np.array([user_id] * len(all_movie_ids))
    predictions = model.predict([user_array, all_movie_ids])

    top_indices = predictions.flatten().argsort()[-num_recommendations:][::-1]
    top_movie_ids = all_movie_ids[top_indices]

    recommended_titles = [movie_id_to_title[movie_id] for movie_id in top_movie_ids]

    return recommended_titles

user_id = 1
recommended_titles = recommend_movie_titles(user_id)
print(f"Recommended movies for user {user_id}: {recommended_titles}")

Recommended movies for user 1: ["Other People's Money (1991)", 'Clerks II (2006)', 'The D Train (2015)', 'Dick (1999)', 'Kick-Ass 2 (2013)']


# **GAN** **Recommendation** **System**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
dataset_path = tf.keras.utils.get_file("ml-latest-small.zip", url, extract=True)
data_dir = dataset_path.replace("ml-latest-small.zip", "ml-latest-small")

ratings = pd.read_csv(f"{data_dir}/ratings.csv")
ratings = ratings[['userId', 'movieId', 'rating']]

ratings['userId'] = ratings['userId'].astype('category').cat.codes.values
ratings['movieId'] = ratings['movieId'].astype('category').cat.codes.values

ratings['rating'] = ratings['rating'] / ratings['rating'].max()

train, test = train_test_split(ratings, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Concatenate, LeakyReLU, BatchNormalization

num_users = ratings['userId'].nunique()
num_items = ratings['movieId'].nunique()
embedding_dim = 50

def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=num_users + num_items))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1, activation='sigmoid'))
    return model

def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_dim=num_users + num_items + 1))  # +1 for the rating
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = Input(shape=(num_users + num_items,))
generated_rating = generator(gan_input)
gan_output = discriminator(Concatenate()([gan_input, generated_rating]))
gan = Model(gan_input, gan_output)
gan.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
train_user_data = train['userId'].values
train_item_data = train['movieId'].values
train_ratings = train['rating'].values

epochs = 10000
batch_size = 64
half_batch = batch_size // 2

for epoch in range(epochs):
    idx = np.random.randint(0, train_user_data.shape[0], half_batch)
    real_samples = np.zeros((half_batch, num_users + num_items))
    real_samples[np.arange(half_batch), train_user_data[idx]] = 1
    real_samples[np.arange(half_batch), num_users + train_item_data[idx]] = 1
    real_ratings = train_ratings[idx].reshape(-1, 1)
    real_data = np.hstack([real_samples, real_ratings])
    real_labels = np.ones((half_batch, 1))

    noise = np.random.normal(0, 1, (half_batch, num_users + num_items))
    fake_ratings = generator.predict(noise)
    fake_data = np.hstack([noise, fake_ratings])
    fake_labels = np.zeros((half_batch, 1))

    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, num_users + num_items))
    valid_y = np.ones((batch_size, 1))

    g_loss = gan.train_on_batch(noise, valid_y)

    if epoch % 1000 == 0:
        print(f"{epoch} [D loss: {d_loss[0]} | D accuracy: {100*d_loss[1]}] [G loss: {g_loss}]")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
6000 [D loss: 0.0003391129139345139 | D accuracy: 100.0] [G loss: 533.5908813476562]
7000 [D loss: 4.021357744932175e-05 | D accuracy: 100.0] [G loss: 618.166748046875]
8000 [D loss: 0.0009194440208375454 | D accuracy: 100.0] [G loss: 588.8328857421875]
9000 [D loss: 1.8492275557946414e-05 | D accuracy: 100.0] [G loss: 649.6171875]


In [None]:
def generate_recommendations(user_id, num_recommendations=5):
    user_vector = np.zeros(num_users)
    user_vector[user_id] = 1

    item_vectors = np.eye(num_items)
    noise = np.random.normal(0, 1, (num_items, num_users + num_items))
    noise[:, :num_users] = user_vector

    predicted_ratings = generator.predict(noise)
    top_indices = predicted_ratings.flatten().argsort()[-num_recommendations:][::-1]

    return top_indices

movies = pd.read_csv(f"{data_dir}/movies.csv")
movies = movies[['movieId', 'title']]

movie_id_to_title = dict(zip(movies['movieId'].astype('category').cat.codes.values, movies['title']))

user_id = 1
recommended_movie_ids = generate_recommendations(user_id)
recommended_titles = [movie_id_to_title[movie_id] for movie_id in recommended_movie_ids]
print(f"Recommended movies for user {user_id}: {recommended_titles}")

Recommended movies for user 1: ['Toy Story (1995)', 'BlacKkKlansman (2018)', 'Iron Soldier (2010)', 'SuperFly (2018)', 'Mission: Impossible - Fallout (2018)']
