In [2]:
pip install numpy pandas scikit-learn streamlit pickle-mixin

Collecting pickle-mixin
  Using cached pickle_mixin-1.0.2-py3-none-any.whl
Installing collected packages: pickle-mixin
Successfully installed pickle-mixin-1.0.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [3]:
# Step 1: Load the dataset
movies = pd.read_csv('movies.csv')


In [5]:
# Step 2: Data Preprocessing
# Combine important features into a single 'tags' column
movies['tags'] = movies['title'] + ' ' + movies['genres'] + ' ' + movies['overview']


In [11]:
!pip install seaborn

# Step 3: Handle NaN values
# Fill NaN values with an empty string
movies['tags'] = movies['tags'].fillna('')

# Vectorization
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(movies['tags']).toarray()



In [13]:
# Step 4: Cosine Similarity Calculation
similarity = cosine_similarity(vector)


In [15]:
# Step 5: Recommendation Function
def recommend(movie):
    movie_index = movies[movies['title'] == movie].index[0]
    distances = similarity[movie_index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
    recommended_movies = []
    for i in movie_list:
        recommended_movies.append(movies.iloc[i[0]].title)
    return recommended_movies

In [17]:
# Step 6: Save the model using Pickle
pickle.dump(movies, open('movies_list.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))