In [4]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.42.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.2-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.42.2
Note: you may need to restart the kernel to use updated packages.


In [12]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

ratings = pd.read_csv("/kaggle/input/movielens-100k-dataset/ml-100k/u.data", 
                      sep="\t", 
                      names=["userId", "movieId", "rating", "timestamp"])
movies = pd.read_csv("/kaggle/input/movielens-100k-dataset/ml-100k/u.item", 
                     sep="|", 
                     encoding="latin-1", 
                     usecols=[0, 1], 
                     names=["movieId", "title"], 
                     header=None)

df = pd.merge(ratings, movies, on="movieId")

duplicates = df.duplicated(subset=["userId", "title"], keep=False)
if duplicates.any():
    print("Found duplicate entries:")
    print(df[duplicates].sort_values(by=["userId", "title"]))
   
    df = df.drop_duplicates(subset=["userId", "title"], keep="first")
    print("Duplicates dropped.")
else:
    print("No duplicates found.")


user_movie_matrix = df.pivot(index="userId", columns="title", values="rating").fillna(0)


matrix = csr_matrix(user_movie_matrix.values)


model = NearestNeighbors(metric="cosine", algorithm="brute")
model.fit(matrix)

# Recommend movies for a given user
def recommend_by_user(user_id,n=5):
    user_vector = matrix[user_id - 1]  
    distances, indices = model.kneighbors(user_vector, n_neighbors=n+1)
    similar_users = indices.flatten()[1:]
    recommended_movies = user_movie_matrix.iloc[similar_users].mean().sort_values(ascending=False).head(n).index
    return list(recommended_movies)

# Test the function
print("Recommendations for user 1:")
print(recommend_by_user(5))

Found duplicate entries:
       userId  movieId  rating  timestamp                      title
20470       1      246       5  874965905         Chasing Amy (1997)
34550       1      268       5  875692927         Chasing Amy (1997)
5063        2      297       4  888550871         Ulee's Gold (1997)
9201        2      303       4  888550774         Ulee's Gold (1997)
11227       3      348       4  889237455  Desperate Measures (1998)
...       ...      ...     ...        ...                        ...
91586     937      303       4  876762200         Ulee's Gold (1997)
85379     939      266       2  880260636  Kull the Conqueror (1997)
90740     939      680       2  880260636  Kull the Conqueror (1997)
70654     942      304       5  891282457       Fly Away Home (1996)
73640     942      500       5  891282816       Fly Away Home (1996)

[614 rows x 5 columns]
Duplicates dropped.
Recommendations for user 1:
['Monty Python and the Holy Grail (1974)', 'Princess Bride, The (1987)', 'S

In [7]:
!ls /kaggle/input/movielens-100k-dataset/ml-100k

allbut.pl  u1.base  u2.test  u4.base  u5.test  ub.base	u.genre  u.occupation
mku.sh	   u1.test  u3.base  u4.test  ua.base  ub.test	u.info	 u.user
README	   u2.base  u3.test  u5.base  ua.test  u.data	u.item
