# **Movie Recommendation System**

## ***Import Library***

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import re

## ***Load Data***

In [5]:
df1 = pd.read_csv("/content/drive/MyDrive/movies.csv")
df2 = pd.read_csv("/content/drive/MyDrive/ratings.csv")

#***Data Preproccessing***

### ***Clean Title***

In [6]:
def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]", "", title)

df1['genres_list'] = df1['genres'].str.replace('|', ' ')
df1['clean_title'] = df1['title'].apply(clean_title)
movies_data = df1[['movieId', 'clean_title', 'genres_list']]

### ***Merge DataFrames***

In [7]:
ratings_data = df2.drop(['timestamp'], axis=1)
combined_data = ratings_data.merge(movies_data, on='movieId')

### ***TF-IDF Vectorizer for Titles***

In [8]:
vectorizer_title = TfidfVectorizer(ngram_range=(1, 2))
tfidf_title = vectorizer_title.fit_transform(movies_data['clean_title'])

### ***TF-IDF Vectorizer for Genres***

In [9]:
vectorizer_genres = TfidfVectorizer(ngram_range=(1, 2))
tfidf_genres = vectorizer_genres.fit_transform(movies_data['genres_list'])

# ***Search Function for Titles***

In [10]:
def search_by_title(title):
    title = clean_title(title)
    query_vec = vectorizer_title.transform([title])
    similarity = cosine_similarity(query_vec, tfidf_title).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies_data.iloc[indices][::-1]
    return results

# ***Search Function for Similar Genres***

In [11]:
def search_similar_genres(genres):
    query_vec = vectorizer_genres.transform([genres])
    similarity = cosine_similarity(query_vec, tfidf_genres).flatten()
    indices = np.argpartition(similarity, -10)[-10:]
    results = movies_data.iloc[indices][::-1]
    return results

# ***Scoring System***

In [12]:
def scores_calculator(movie_id):
    similar_users = combined_data[(combined_data['movieId'] == movie_id) & (combined_data['rating'] >= 4)]['userId'].unique()
    similar_user_recs = combined_data[(combined_data['userId'].isin(similar_users)) & (combined_data['rating'] >= 4)]['movieId']
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    all_users = combined_data[(combined_data['movieId'].isin(similar_user_recs.index)) & (combined_data['rating'] >= 4)]
    all_users_recs = all_users['movieId'].value_counts() / all_users['userId'].nunique()

    genres_of_selected_movie = combined_data[combined_data['movieId'] == movie_id]['genres_list'].unique()
    genres_of_selected_movie = np.array2string(genres_of_selected_movie)
    movies_with_similar_genres = search_similar_genres(genres_of_selected_movie)

    indices = []
    for index in movies_with_similar_genres[(movies_with_similar_genres['movieId'].isin(similar_user_recs.index))]['movieId']:
        indices.append(index)

    similar_user_recs.loc[indices] = similar_user_recs.loc[indices] * 1.5

    indices = []
    for index in movies_with_similar_genres[(movies_with_similar_genres['movieId'].isin(all_users_recs.index))]['movieId']:
        indices.append(index)
    all_users_recs.loc[indices] = all_users_recs.loc[indices] * 0.9

    rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1)
    rec_percentages.columns = ['similar', 'all']
    rec_percentages['score'] = rec_percentages['similar'] / rec_percentages['all']

    rec_percentages = rec_percentages.sort_values('score', ascending=False)
    return rec_percentages

# ***Recommendation System Function***

In [13]:
def recommendation_results(user_input, title=0):
    title_candidates = search_by_title(user_input)
    movie_id = title_candidates.iloc[title]['movieId']
    scores = scores_calculator(movie_id)
    results = scores.head(10).merge(movies_data, left_index=True, right_on='movieId')[['clean_title', 'score', 'genres_list']]
    results.rename(columns={'clean_title': 'title', 'genres_list': 'genres'}, inplace=True)
    return results


# ***Result***

In [19]:
def display_search_results(results):
    print("\n" + "="*70)
    print(" 🎬  SEARCH RESULTS  🎬 ".center(70, '='))
    print("="*70)

    for i, row in results.iterrows():
        print(f"🔎 {i+1}. {row['clean_title']}")
        print(f"   📚 Genres: {row['genres_list']}")
        print("—" * 70)

    print("="*70)

def display_recommendation_results(recommendations):
    print("\n" + "="*70)
    print(" 🌟  RECOMMENDATION RESULTS  🌟 ".center(70, '='))
    print("="*70)

    for i, row in recommendations.iterrows():
        print(f"🎥 {i+1}. {row['title']}")
        print(f"   📚 Genres: {row['genres']}")
        print(f"   ⭐ Score: {row['score']:.4f}\n")
        print("—" * 70)

    print("="*70)

# Example Usage
user_input = "Toy Story"
search_results = search_by_title(user_input)
recommendations = recommendation_results(user_input)

display_search_results(search_results)
display_recommendation_results(recommendations)



🔎 3022. Toy Story 2 1999
   📚 Genres: Adventure Animation Children Comedy Fantasy
——————————————————————————————————————————————————————————————————————
🔎 14814. Toy Story 3 2010
   📚 Genres: Adventure Animation Children Comedy Fantasy IMAX
——————————————————————————————————————————————————————————————————————
🔎 1. Toy Story 1995
   📚 Genres: Adventure Animation Children Comedy Fantasy
——————————————————————————————————————————————————————————————————————
🔎 59768. Toy Story 4 2019
   📚 Genres: Adventure Animation Children Comedy
——————————————————————————————————————————————————————————————————————
🔎 20498. Toy Story of Terror 2013
   📚 Genres: Animation Children Comedy
——————————————————————————————————————————————————————————————————————

🎥 22634. Toy Story Toons Hawaiian Vacation 2011
   📚 Genres: Adventure Animation Children Comedy Fantasy
   ⭐ Score: 15.4554

——————————————————————————————————————————————————————————————————————
🎥 22635. Toy Story Toons Small Fry 2011
   📚 Genres