# Collaborative Filtering Recommendation System

## Task 1: Import Modules

In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.metrics.pairwise import cosine_similarity

## Task 2: Import the Dataset

In [2]:
# Load the rating data into a DataFrame:
column_names = ['User_ID', 'User_Names','Movie_ID','Rating','Timestamp']
movies_df = pd.read_csv('Movie_data.csv', sep = ',', names = column_names)

#Load the move information in a DataFrame:
movies_title_df = pd.read_csv("Movie_Id_Titles.csv")
movies_title_df.rename(columns = {'item_id':'Movie_ID', 'title':'Movie_Title'}, inplace = True)

#Merge the DataFrames:
movies_df = pd.merge(movies_df,movies_title_df, on='Movie_ID')

#View the DataFrame:
print(movies_df.head())

   User_ID    User_Names  Movie_ID  Rating  Timestamp  \
0        0  Shawn Wilson        50       5  881250949   
1        0  Shawn Wilson       172       5  881250949   
2        0  Shawn Wilson       133       1  881250949   
3      196  Bessie White       242       3  881250949   
4      196  Bessie White       393       4  881251863   

                       Movie_Title  
0                 Star Wars (1977)  
1  Empire Strikes Back, The (1980)  
2        Gone with the Wind (1939)  
3                     Kolya (1996)  
4            Mrs. Doubtfire (1993)  


## Task 3: Explore the Dataset

In [3]:
print(f"\n Size of the movie_df dataset is {movies_df.shape}")
movies_df.describe()
movies_df.groupby('User_ID')['Rating'].count().sort_values(ascending = True).head()
n_users = movies_df.User_ID.unique().shape[0]
n_movies = movies_df.Movie_ID.unique().shape[0]
print( str(n_users) + ' users')
print( str(n_movies) + ' movies')


 Size of the movie_df dataset is (100003, 6)
944 users
1682 movies


## Task 4: Create an Interaction Matrix

In [4]:
#This would be a 2D array matrix to display user-movie_rating relationship
#Rows represent users by IDs, columns represent movies by IDs
ratings = np.zeros((n_users, n_movies))
for row in movies_df.itertuples():
    ratings[row[1], row[3]-1] = row[4]

# View the matrix
print(ratings)

[[0. 0. 0. ... 0. 0. 0.]
 [5. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]


## Task 5: Explore the Interaction Matrix

In [5]:
sparsity = 1.0 - (float(len(ratings.nonzero()[0])) / (ratings.shape[0] * ratings.shape[1]))
sparsity *= 100
print(sparsity)

93.70182037122876


## Task 6 : Create a Similarity Matrix

In [6]:
rating_cosine_similarity = cosine_similarity(ratings)

## Task 7: Provide Recommendations

In [7]:
def movie_recommender(user_item_m, X_user, user, k=10, top_n=10):
    # Get the location of the actual user in the User-Items matrix
    # Use it to index the User similarity matrix
    user_similarities = X_user[user]
    # obtain the indices of the top k most similar users
    most_similar_users = user_item_m.index[user_similarities.argpartition(-k)[-k:]]
    # Obtain the mean ratings of those users for all movies
    rec_movies = user_item_m.loc[most_similar_users].mean(0).sort_values(ascending=False)
    # Discard already seen movies
    m_seen_movies = user_item_m.loc[user].gt(0)
    seen_movies = m_seen_movies.index[m_seen_movies].tolist()
    rec_movies = rec_movies.drop(seen_movies).head(top_n)
    # return recommendations - top similar users rated movies
    rec_movies_a=rec_movies.index.to_frame().reset_index(drop=True)
    rec_movies_a.rename(columns={rec_movies_a.columns[0]: 'Movie_ID'}, inplace=True)
    return rec_movies_a

## Task 8: View the Provided Recommendations 

In [8]:
#Converting the 2D array into a DataFrame as expected by the movie_recommender function
ratings_df=pd.DataFrame(ratings)
user_ID=12
movie_recommender(ratings_df, rating_cosine_similarity,user_ID)


  rec_movies = user_item_m.loc[most_similar_users].mean(0).sort_values(ascending=False)


Unnamed: 0,Movie_ID
0,180
1,495
2,209
3,422
4,172
5,384
6,567
7,78
8,565
9,21


## Task 9: Create Wrapper Function

In [None]:
def movie_recommender_run(user_Name):
    #Get ID from Name
    user_ID=movies_df.loc[movies_df['User_Names'] == user_Name].User_ID.values[0]
    #Call the function
    temp=movie_recommender(ratings_df, rating_cosine_similarity, user_ID)
    # Join with the movie_title_df to get the movie titles
    top_k_rec=temp.merge(movies_title_df, how='inner')
    return top_k_rec

Bad pipe message: %s [b'0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\nHost: localhost:35229\r\nUs', b'-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.']
Bad pipe message: %s [b'0.0 Safari/537.36\r\nAccept-Encoding: gzip, defla']
Bad pipe message: %s [b', br, zstd\r\nAccept-Language: en-US,en;q=0.9\r\nCache-Control: max-age=0\r\nReferer: https://cuddly-barnacle-4jrq7gxpxjg', b'7g5j.github.dev/\r\nX-Request-ID: aecb473040a90721b9f8f52a70cfcc7c\r\nX-Real-IP: 106.201.250.213\r\nX-Forwarded-Port: 4']
Bad pipe message: %s [b'\r\nX-Forwarded-Scheme: https\r\nX-Original-URI: /\r\nX-S', b'eme: https\r\nsec-fetch-site: same-site\r\nsec-fetch-mode: navigate\r\nsec-fetch-dest: document\r\nsec-ch-']
