# Movie recommandation system

## Exploring Dataset

In [None]:
import pandas as pd

df = pd.read_csv("../datasets/movies.csv")
df.head()

In [None]:
df.shape

In [None]:
df.columns

# Preprocessing

In [None]:
def combine_fields(row):
    return (
        f"{row['title']} directed by {row['director']}. "
        f"Genre: {row['genres']}. "
        f"Starring: {row['actors']}. "
        f"{row['description']}"
    )

df['full_text'] = df.apply(combine_fields, axis=1)

# Embeddings

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
desc_vector = embedding_model.encode(df['full_text'])

desc_vector.shape

# Model Training

In [None]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=10, metric='cosine')
knn.fit(desc_vector)


In [None]:
def recommend_movies(id, no_of_movies=5):
    movie_row = df.loc["id"==id]
    print(f"Selected Movie: {movie_row['title']}")
    query_vector = embedding_model.encode([movie_row['full_text']])
    distances, indices = knn.kneighbors(query_vector, n_neighbors=no_of_movies+1)  
    recommended_df = df.iloc[indices[0][1:]]  

    return recommended_df

recommend_movies(1726, 10)

In [None]:
def recommend_movies_by_text(text, no_of_movies=5):
    query_vector = embedding_model.encode([text])
    distances, indices = knn.kneighbors(query_vector, n_neighbors=no_of_movies)  
    recommended_df = df.iloc[indices[0]]  

    return recommended_df

recommend_movies_by_text("The movie about spider", 10)

# Saving Our Model

In [None]:
import joblib

joblib.dump(knn, "../movie_project/knn.pkl")
joblib.dump(embedding_model, "../movie_project/embedding_model.pkl")