**1. Movie Recommendation System**

In [16]:
#Required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

In [17]:
#Load datasets
movies=pd.read_csv('/home/parallels/Documents/Machine_Learning_Python_Project_IBM_Internship/Datasets/tmdb_5000_movies.csv')
credits=pd.read_csv('/home/parallels/Documents/Machine_Learning_Python_Project_IBM_Internship/Datasets/tmdb_5000_credits.csv')

#Merge the datasets based on title
movies=movies.merge(credits, on='title')

In [18]:
#Preprocessing
#Cleaning
#Keeping only useful cloums
movies=movies[['movie_id','title','overview','genres','cast','crew']]

#Drop rows with missing info
movies.dropna(subset=['overview'], inplace=True)

In [19]:
#Parsing
def convert(obj,limit=None):
  try:
    L=ast.literal_eval(obj)
    if limit is None:
      return [i['name'] for i in L]
    else:
      return [i['name'] for i in L[:limit]]
  except:
    return []

def get_director(obj):
    try:
        L=ast.literal_eval(obj)
        for i in L:
            if i['job']=='Director':
              return [i['name']]
        return []
    except:
        return []

#Apply functions
movies['genres']=movies['genres'].apply(convert)
movies['genres']=movies['genres'].apply(lambda x: [i.lower() for i in x])
movies['cast']=movies['cast'].apply(lambda x: convert(x, 3))
movies['crew']=movies['crew'].apply(get_director)

In [20]:
#Combine columns into one string per movie
movies['soup']=movies['overview'] + ' ' + \
movies['genres'].apply(lambda x: ' '.join(x)) + ' ' + \
movies['cast'].apply(lambda x: ' '.join(x)) + ' ' + \
movies['crew'].apply(lambda x: ' '.join(x))

In [21]:
#Vectorization using tf-idf
tfidf=TfidfVectorizer(stop_words='english')
tfidf_matrix=tfidf.fit_transform(movies['soup'])

In [22]:
#Cosine similarity matrix
cosine_sim=cosine_similarity(tfidf_matrix,tfidf_matrix)

In [23]:
#Reset the index of the 'movies' dataframe and create pandas series called indices
movies=movies.reset_index(drop=True)
indices=pd.Series(movies.index,index=movies['title']).drop_duplicates()

#Function to recommend similar movies based on title of the movie
def recommend_title(title,num_recommendations=5):
    idx=indices.get(title)

    if idx is None:
        title_lower=title.lower()
        matched=next((t for t in indices.index if t.lower()==title_lower),None)
        if matched:
            idx=indices[matched]
        else:
            return "Movie not found!"

    sim_scores_dense=cosine_sim[idx].toarray().flatten() if hasattr(cosine_sim[idx],'toarray') else cosine_sim[idx]
    sim_scores_list=sim_scores_dense.tolist()

    sim_scores=list(enumerate(sim_scores_list))
    sim_scores=sorted(sim_scores,key=lambda x:x[1],reverse=True)[1:num_recommendations+1]
    movie_indices=[i[0] for i in sim_scores]

    recommended_movies=movies[['title','genres']].iloc[movie_indices]
    return recommended_movies

#Function to recommend similar movies based on the genre of the movie
def recommend_genre(genre_string, num_recommendations=5):
    genres=[g.strip().lower() for g in genre_string.split(',')]

    genre_movies=movies[movies['genres'].apply(lambda x: all(g in x for g in genres))]

    if genre_movies.empty:
        return "No movies found with all specified genres!"

    genre_indices=genre_movies.index.tolist()

    sim_matrix=cosine_sim[genre_indices]
    avg_sim_scores=sim_matrix.mean(axis=0)

    sim_scores=list(enumerate(avg_sim_scores))
    sim_scores=sorted(sim_scores,key=lambda x: x[1], reverse=True)

    top_indices=[i[0] for i in sim_scores[:num_recommendations]]
    return movies[['title', 'genres']].iloc[top_indices].reset_index(drop=True)

In [24]:
recommend_title(input("Enter a movie title: "))

Unnamed: 0,title,genres
870,Superman II,"[action, adventure, fantasy, science fiction]"
2435,Superman IV: The Quest for Peace,"[action, adventure, science fiction]"
10,Superman Returns,"[adventure, fantasy, action, science fiction]"
1297,Superman III,"[comedy, action, adventure, fantasy, science f..."
4405,The Helix... Loaded,"[action, comedy, science fiction]"


In [25]:
recommend_genre(input("Enter a genre: "))

Unnamed: 0,title,genres
0,The Helix... Loaded,"[action, comedy, science fiction]"
1,Mad Max: Fury Road,"[action, adventure, science fiction, thriller]"
2,Last Action Hero,"[adventure, fantasy, action, comedy, family]"
3,X-Men: Days of Future Past,"[action, adventure, fantasy, science fiction]"
4,Man of Steel,"[action, adventure, fantasy, science fiction]"
