<a href="https://colab.research.google.com/github/Hari-4501/Movie_recommendation_system/blob/main/Movie_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn pandas numpy

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity




In [None]:
# Load the dataset
df = pd.read_csv('/content/dataset.csv')

# Preview the dataset
df.head()


Unnamed: 0,id,title,genre,original_language,overview,popularity,release_date,vote_average,vote_count
0,278,The Shawshank Redemption,"Drama,Crime",en,Framed in the 1940s for the double murder of h...,94.075,1994-09-23,8.7,21862
1,19404,Dilwale Dulhania Le Jayenge,"Comedy,Drama,Romance",hi,"Raj is a rich, carefree, happy-go-lucky second...",25.408,1995-10-19,8.7,3731
2,238,The Godfather,"Drama,Crime",en,"Spanning the years 1945 to 1955, a chronicle o...",90.585,1972-03-14,8.7,16280
3,424,Schindler's List,"Drama,History,War",en,The true story of how businessman Oskar Schind...,44.761,1993-12-15,8.6,12959
4,240,The Godfather: Part II,"Drama,Crime",en,In the continuing saga of the Corleone crime f...,57.749,1974-12-20,8.6,9811


In [None]:
# Fill missing values
df['genre'] = df['genre'].fillna('')
df['overview'] = df['overview'].fillna('')

# Combine features for vectorization
df['combined_features'] = df['genre'] + ' ' + df['overview']


In [None]:
# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['combined_features'])

# Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [None]:
# Reset index for mapping
df = df.reset_index()
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

def recommend_movies(user_genre_interest, user_behavior_keywords, top_n=5):
    user_input = user_genre_interest + ' ' + user_behavior_keywords

    user_vec = vectorizer.transform([user_input])
    similarity_scores = cosine_similarity(user_vec, tfidf_matrix)

    sim_scores = list(enumerate(similarity_scores[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[:top_n]

    recommended_indices = [i[0] for i in sim_scores]
    return df[['title', 'genre', 'overview']].iloc[recommended_indices]


In [None]:
# Try user input
genre_input = "drama action"
behavior_input = "sports"

recommendations = recommend_movies(genre_input, behavior_input, top_n=10)
recommendations


Unnamed: 0,title,genre,overview
8987,Playing for Keeps,"Comedy,Romance",A former sports star who's fallen on hard time...
5854,Thunderbolt,"Action,Thriller","In order to release his kidnapped sister, spor..."
8218,Point Break,Action,A young undercover FBI agent infiltrates a gan...
7588,Two for the Money,"Comedy,Crime,Drama,Thriller",A former college athlete joins forces with a s...
6933,What Men Want,"Comedy,Romance","Magically able to hear what men are thinking, ..."
4143,Bleed for This,Drama,The inspirational story of World Champion Boxe...
1502,Waves,"Romance,Drama",A controlling father’s attempts to ensure that...
9836,xXx: State of the Union,"Action,Adventure,Crime,Mystery,Thriller",Darius Stone's criminal record and extreme spo...
6373,The Program,Drama,An Irish sports journalist becomes convinced t...
3032,Jappeloup,Drama,A true sports story that utterly defies the od...
