In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Load MovieLens dataset
ratings_data = pd.read_csv('ratings.csv')  # Assuming ratings.csv contains user ratings data
movies_data = pd.read_csv('movies.csv')    # Assuming movies.csv contains movie metadata

# Filter users with more than 10 ratings
user_ratings_count = ratings_data['userId'].value_counts()
active_users = user_ratings_count[user_ratings_count > 10].index
ratings_data_filtered = ratings_data[ratings_data['userId'].isin(active_users)]

# Create user-item matrix with sparse representation
user_item_matrix = ratings_data_filtered.pivot_table(index='userId', columns='movieId', values='rating')
user_item_matrix = user_item_matrix.fillna(0)  # Fill missing values with 0
user_item_sparse = csr_matrix(user_item_matrix.values)
# Fit K-Means clustering
k = 50  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42, n_init="auto")
clusters = kmeans.fit_predict(user_item_sparse)

# Recommend movies for a user based on similarity to other users in the same cluster
def recommend_movies(user_id, num_recommendations=5):
    user_cluster = clusters[user_id - 1]  # User ID starts from 1
    similar_users = np.where(clusters == user_cluster)[0]  # Get indices of users in the same cluster
    similar_users = similar_users[similar_users != user_id - 1]  # Exclude the input user

    # Compute cosine similarity between input user and other users in the cluster
    similarities = cosine_similarity(user_item_matrix.iloc[user_id - 1].values.reshape(1, -1),
                                      user_item_matrix.iloc[similar_users])

    # Get indices of top similar users based on similarity score
    top_similar_users = np.argsort(similarities[0])[::-1][:num_recommendations]

    # Get movies rated highly by the top similar users
    recommended_movies = user_item_matrix.iloc[similar_users[top_similar_users]].mean(axis=0)
    recommended_movies = recommended_movies.sort_values(ascending=False)[:num_recommendations]

    # Get movie information from movies_data
    recommended_movies_info = movies_data[movies_data['movieId'].isin(recommended_movies.index)]

    return recommended_movies_info

# Example usage:
user_id = 1
recommendations = recommend_movies(user_id)
print(recommendations)


      movieId                                              title  \
520       608                                       Fargo (1996)   
898      1196  Star Wars: Episode V - The Empire Strikes Back...   
900      1198  Raiders of the Lost Ark (Indiana Jones and the...   
902      1200                                      Aliens (1986)   
1939     2571                                 Matrix, The (1999)   

                              genres  
520      Comedy|Crime|Drama|Thriller  
898          Action|Adventure|Sci-Fi  
900                 Action|Adventure  
902   Action|Adventure|Horror|Sci-Fi  
1939          Action|Sci-Fi|Thriller  


In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("links.csv")

In [None]:
df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
