# Importing Essential Libraries

In [54]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict


# Step1:Loading Dataset

In [56]:
movie_system = pd.read_csv("Movies Recommendation.csv")

In [57]:
movie_system.head(4)

Unnamed: 0,Movie_ID,Movie_Title,Movie_Genre,Movie_Language,Movie_Budget,Movie_Popularity,Movie_Release_Date,Movie_Revenue,Movie_Runtime,Movie_Vote,...,Movie_Homepage,Movie_Keywords,Movie_Overview,Movie_Production_House,Movie_Production_Country,Movie_Spoken_Language,Movie_Tagline,Movie_Cast,Movie_Crew,Movie_Director
0,1,Four Rooms,Crime Comedy,en,4000000,22.87623,09-12-1995,4300000,98.0,6.5,...,,hotel new year's eve witch bet hotel room,It's Ted the Bellhop's first night on the job....,"[{""name"": ""Miramax Films"", ""id"": 14}, {""name"":...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...","[{""iso_639_1"": ""en"", ""name"": ""English""}]",Twelve outrageous guests. Four scandalous requ...,Tim Roth Antonio Banderas Jennifer Beals Madon...,"[{'name': 'Allison Anders', 'gender': 1, 'depa...",Allison Anders
1,2,Star Wars,Adventure Action Science Fiction,en,11000000,126.393695,25-05-1977,775398007,121.0,8.1,...,http://www.starwars.com/films/star-wars-episod...,android galaxy hermit death star lightsaber,Princess Leia is captured and held hostage by ...,"[{""name"": ""Lucasfilm"", ""id"": 1}, {""name"": ""Twe...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...","[{""iso_639_1"": ""en"", ""name"": ""English""}]","A long time ago in a galaxy far, far away...",Mark Hamill Harrison Ford Carrie Fisher Peter ...,"[{'name': 'George Lucas', 'gender': 2, 'depart...",George Lucas
2,3,Finding Nemo,Animation Family,en,94000000,85.688789,30-05-2003,940335536,100.0,7.6,...,http://movies.disney.com/finding-nemo,father son relationship harbor underwater fish...,"Nemo, an adventurous young clownfish, is unexp...","[{""name"": ""Pixar Animation Studios"", ""id"": 3}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...","[{""iso_639_1"": ""en"", ""name"": ""English""}]","There are 3.7 trillion fish in the ocean, they...",Albert Brooks Ellen DeGeneres Alexander Gould ...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton
3,4,Forrest Gump,Comedy Drama Romance,en,55000000,138.133331,06-07-1994,677945399,142.0,8.2,...,,vietnam veteran hippie mentally disabled runni...,A man with a low IQ has accomplished great thi...,"[{""name"": ""Paramount Pictures"", ""id"": 4}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...","[{""iso_639_1"": ""en"", ""name"": ""English""}]","The world will never be the same, once you've ...",Tom Hanks Robin Wright Gary Sinise Mykelti Wil...,"[{'name': 'Alan Silvestri', 'gender': 2, 'depa...",Robert Zemeckis



# Drop unnecessary columns


In [59]:
drop_columns = ['Movie_ID', 'Movie_Runtime', 'Movie_Keywords', 'Movie_Tagline',
                'Movie_Spoken_Language', 'Movie_Homepage', 'Movie_Overview',
                'Movie_Cast', 'Movie_Crew', 'Movie_Budget', 'Movie_Production_House',
                'Movie_Production_Country']
movie_system = movie_system.drop(columns=drop_columns)



# Convert release date to year

In [61]:
movie_system['Movie_Release_Date'] = pd.to_datetime(movie_system['Movie_Release_Date'],
                                                    format='%d-%m-%Y', errors='coerce')
movie_system['Released_Year'] = movie_system['Movie_Release_Date'].dt.year
movie_system = movie_system.drop(columns=['Movie_Release_Date'])

# Fill missing directors manually

In [63]:
null_directors = movie_system[movie_system['Movie_Director'].isnull()]
director_mapping = {
    189: "Roland Emmerich", 245: "Pete Docter", 753: "John Herzfeld",
    1262: "Jon Amiel", 2673: "Brandon Camp", 2717: "Jugal Hansraj",
    2916: "R. Balki", 3266: "Brian McDonald", 3323: "A. Raven Cruz",
    3325: "John Wright", 3420: "Simon Phillips", 3422: "Finn Taylor",
    3552: "Susan Seidelman", 3981: "Michael Paul Girard",
    3992: "Martin Scorsese", 4026: "Sarah Jacobson", 4091: "Colin Trevorrow",
    4654: "Jeff Leroy", 4711: "Wajahat Rauf", 4719: "Christopher Folkens",
    4739: "Daniel Columbie", 4758: "Mike Mayhall"
}
for idx, director in director_mapping.items():
    movie_system.at[idx, 'Movie_Director'] = director




# Define known genres

In [65]:
known_genres = [
    "Action", "Adventure", "Science Fiction", "Thriller", "Animation", "Comedy",
    "Family", "Crime", "Drama", "Mystery", "Romance", "Horror", "Documentary",
    "Music", "Fantasy", "History", "War", "Western", "Foreign"
]


# Parse genres into list and assign genre IDs

In [67]:
def parse_genres(genre_string, known_genres):
    return [genre for genre in known_genres if genre in genre_string]

movie_system['Movie_Genre'] = movie_system['Movie_Genre'].apply(
    lambda x: parse_genres(x, known_genres) if isinstance(x, str) else []
)

all_genres = set(genre for row in movie_system['Movie_Genre'] for genre in row)
genre_to_id = {genre: idx for idx, genre in enumerate(sorted(all_genres))}
id_to_genre = {v: k for k, v in genre_to_id.items()}
movie_system['Movie_Genre'] = movie_system['Movie_Genre'].apply(
    lambda x: [genre_to_id[genre] for genre in x]
)

# Encode categorical data

In [69]:
label_encoder_movie_name = LabelEncoder()
label_encoder_movie_language = LabelEncoder()
label_encoder_movie_director = LabelEncoder()

movie_system['Movie_Title'] = label_encoder_movie_name.fit_transform(movie_system['Movie_Title'])
movie_system['Movie_Language'] = label_encoder_movie_language.fit_transform(movie_system['Movie_Language'])
movie_system['Movie_Director'] = label_encoder_movie_director.fit_transform(movie_system['Movie_Director'])



# Standardize numerical features

In [71]:
movie_system['Movie_Popularity'] = (movie_system['Movie_Popularity'] - movie_system['Movie_Popularity'].mean()) / movie_system['Movie_Popularity'].std()
movie_system['Movie_Revenue'] = (movie_system['Movie_Revenue'] - movie_system['Movie_Revenue'].mean()) / movie_system['Movie_Revenue'].std()
movie_system['Movie_Vote'] = (movie_system['Movie_Vote'] - movie_system['Movie_Vote'].mean()) / movie_system['Movie_Vote'].std()
movie_system['Movie_Vote_Count'] = (movie_system['Movie_Vote_Count'] - movie_system['Movie_Vote_Count'].mean()) / movie_system['Movie_Vote_Count'].std()



# Create decoded columns for display

In [73]:
movie_name_to_id = dict(zip(label_encoder_movie_name.transform(label_encoder_movie_name.classes_), label_encoder_movie_name.classes_))
movie_language_to_id = dict(zip(label_encoder_movie_language.transform(label_encoder_movie_language.classes_), label_encoder_movie_language.classes_))
movie_director_to_id = dict(zip(label_encoder_movie_director.transform(label_encoder_movie_director.classes_), label_encoder_movie_director.classes_))

movie_system['Decoded_Movie_Title'] = movie_system['Movie_Title'].map(movie_name_to_id)
movie_system['Decoded_Movie_Language'] = movie_system['Movie_Language'].map(movie_language_to_id)
movie_system['Decoded_Movie_Director'] = movie_system['Movie_Director'].map(movie_director_to_id)


# Cosine Similarity Function

In [75]:
def recommend_movies(selected_movie, filter_type, filter_value):
    # Build genre matrix
    all_genres = sorted(id_to_genre.keys())
    genre_matrix = np.zeros((len(movie_system), len(all_genres)))
    for idx, row in movie_system.iterrows():
        for genre_id in row['Movie_Genre']:
            if genre_id < len(all_genres):  # Prevent index out of range
                genre_matrix[idx, genre_id] = 1

    selected_index = selected_movie.name
    selected_vector = genre_matrix[selected_index].reshape(1, -1)
    similarities = cosine_similarity(selected_vector, genre_matrix).flatten()
    movie_system['Similarity'] = similarities

    # Apply filters
    if filter_type == "genre":
        filtered_movies = movie_system[movie_system['Movie_Genre'].apply(lambda x: any(g in x for g in selected_movie['Movie_Genre']))]
    elif filter_type == "language":
        filtered_movies = movie_system[movie_system['Movie_Language'] == filter_value]
    elif filter_type == "director":
        filtered_movies = movie_system[movie_system['Movie_Director'] == filter_value]
    else:
        print("Invalid filter type.")
        return

    recommended_movies = filtered_movies.sort_values(by=['Similarity', 'Movie_Popularity'], ascending=[False, False])
    display_recommendations(recommended_movies.head(20))


# Display Function

In [77]:
def display_recommendations(recommended_movies):
    print("\n🌟 Top Recommendations 🌟")
    print("{:<40} {:<15} {:<20} {:<30} {:<10}".format("Title", "Language", "Director", "Genres", "Popularity"))
    print("-" * 120)
    for _, row in recommended_movies.iterrows():
        decoded_genres = [id_to_genre.get(genre_id, "Unknown") for genre_id in row['Movie_Genre']]
        print("{:<40} {:<15} {:<20} {:<30} {:.2f}".format(
            row['Decoded_Movie_Title'],
            row['Decoded_Movie_Language'],
            row['Decoded_Movie_Director'],
            ', '.join(decoded_genres),
            row['Movie_Popularity']
        ))

# Input & Prediction Function

In [81]:
def get_movie_recommendations():
    print("\n🎥 Dua Fatima Presenting Movie Recommendation System 🎥")
    movie_name = input("Enter The Movie Name: ").strip()
    filter_type = input("Apply Filter (Genre, Language, Director): ").strip().lower()

    if movie_name not in movie_system['Decoded_Movie_Title'].values:
        print(f"\n❌ Movie '{movie_name}' not found in the dataset.")
        return

    selected_movie = movie_system[movie_system['Decoded_Movie_Title'] == movie_name].iloc[0]

    if filter_type == "genre":
        selected_genres = [id_to_genre[g] for g in selected_movie['Movie_Genre']]
        print(f"\n🎬 Selected Movie Genres: {', '.join(selected_genres)}")
        filter_value = selected_movie['Movie_Genre']
    elif filter_type == "language":
        filter_value = selected_movie['Movie_Language']
    elif filter_type == "director":
        filter_value = selected_movie['Movie_Director']
    else:
        print("\n⚠️ Invalid filter type. Please choose from 'Genre', 'Language', or 'Director'.")
        return

    print(f"\n🔍 Filter Applied: {filter_type.capitalize()} - {filter_value}")
    recommend_movies(selected_movie, filter_type, filter_value)

# Run the system
if __name__ == "__main__":
    while True:
        get_movie_recommendations()
        again = input("\n🔁 Do you want to search for another movie? (yes/no): ").strip().lower()
        if again != "yes":
            print("\n👋 Thank you for using Dua Fatima's Movie Recommendation System!")
            break



🎥 Dua Fatima Presenting Movie Recommendation System 🎥


Enter The Movie Name:  star wars
Apply Filter (Genre, Language, Director):  enre



❌ Movie 'star wars' not found in the dataset.



🔁 Do you want to search for another movie? (yes/no):  no



👋 Thank you for using Dua Fatima's Movie Recommendation System!
