# **Importing the Libraries and Functions**

In [152]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# **Load the dataset into a pandas DataFrame**

In [153]:
dataset = pd.read_csv("/content/dataset.csv")

# **Displaying first 20 rows of the dataset**

In [154]:
dataset.head(20)

Unnamed: 0,id,title,genre,original_language,overview,popularity,release_date,vote_average,vote_count
0,278,The Shawshank Redemption,"Drama,Crime",en,Framed in the 1940s for the double murder of h...,94.075,1994-09-23,8.7,21862.0
1,19404,Dilwale Dulhania Le Jayenge,"Comedy,Drama,Romance",hi,"Raj is a rich, carefree, happy-go-lucky second...",25.408,1995-10-19,8.7,3731.0
2,238,The Godfather,"Drama,Crime",en,"Spanning the years 1945 to 1955, a chronicle o...",90.585,1972-03-14,8.7,16280.0
3,424,Schindler's List,"Drama,History,War",en,The true story of how businessman Oskar Schind...,44.761,1993-12-15,8.6,12959.0
4,240,The Godfather: Part II,"Drama,Crime",en,In the continuing saga of the Corleone crime f...,57.749,1974-12-20,8.6,9811.0
5,667257,Impossible Things,"Family,Drama",es,"Matilde is a woman who, after the death of her...",14.358,2021-06-17,8.6,255.0
6,129,Spirited Away,"Animation,Family,Fantasy",ja,"A young girl, Chihiro, becomes trapped in a st...",92.056,2001-07-20,8.5,13093.0
7,730154,Your Eyes Tell,"Romance,Drama",ja,"A tragic accident lead to Kaori's blindness, b...",51.345,2020-10-23,8.5,339.0
8,372754,Dou kyu sei – Classmates,"Romance,Animation",ja,"Rihito Sajo, an honor student with a perfect s...",14.285,2016-02-20,8.5,239.0
9,372058,Your Name.,"Romance,Animation,Drama",ja,High schoolers Mitsuha and Taki are complete s...,158.27,2016-08-26,8.5,8895.0


# **Displaying the column names from the dataset**

In [155]:
dataset.columns

Index(['id', 'title', 'genre', 'original_language', 'overview', 'popularity',
       'release_date', 'vote_average', 'vote_count'],
      dtype='object')

# **Forming a new column named tags by combining genre and overview column**

In [156]:
dataset["tags"] = dataset["genre"] + dataset["overview"]

In [157]:
dataset.head()

Unnamed: 0,id,title,genre,original_language,overview,popularity,release_date,vote_average,vote_count,tags
0,278,The Shawshank Redemption,"Drama,Crime",en,Framed in the 1940s for the double murder of h...,94.075,1994-09-23,8.7,21862.0,"Drama,CrimeFramed in the 1940s for the double ..."
1,19404,Dilwale Dulhania Le Jayenge,"Comedy,Drama,Romance",hi,"Raj is a rich, carefree, happy-go-lucky second...",25.408,1995-10-19,8.7,3731.0,"Comedy,Drama,RomanceRaj is a rich, carefree, h..."
2,238,The Godfather,"Drama,Crime",en,"Spanning the years 1945 to 1955, a chronicle o...",90.585,1972-03-14,8.7,16280.0,"Drama,CrimeSpanning the years 1945 to 1955, a ..."
3,424,Schindler's List,"Drama,History,War",en,The true story of how businessman Oskar Schind...,44.761,1993-12-15,8.6,12959.0,"Drama,History,WarThe true story of how busines..."
4,240,The Godfather: Part II,"Drama,Crime",en,In the continuing saga of the Corleone crime f...,57.749,1974-12-20,8.6,9811.0,"Drama,CrimeIn the continuing saga of the Corle..."


# **Creating a new dataset with required columns**

In [158]:
new_dataset = dataset[["id", "title", "genre", "tags"]]

# **Displaying the first 5 rows of the new dataset**

In [159]:
new_dataset.head()

Unnamed: 0,id,title,genre,tags
0,278,The Shawshank Redemption,"Drama,Crime","Drama,CrimeFramed in the 1940s for the double ..."
1,19404,Dilwale Dulhania Le Jayenge,"Comedy,Drama,Romance","Comedy,Drama,RomanceRaj is a rich, carefree, h..."
2,238,The Godfather,"Drama,Crime","Drama,CrimeSpanning the years 1945 to 1955, a ..."
3,424,Schindler's List,"Drama,History,War","Drama,History,WarThe true story of how busines..."
4,240,The Godfather: Part II,"Drama,Crime","Drama,CrimeIn the continuing saga of the Corle..."


# **Creating an object of converting text data into a matrix of token counts**

In [160]:
ins = CountVectorizer(max_features = 20000, stop_words="english")

In [None]:
ins

## **Transforming the text data from the tags column into a numerical feature array based on movie and key words**

In [161]:
vec = ins.fit_transform(new_dataset["tags"].values.astype("U")).toarray()

In [162]:
vec

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [163]:
vec.shape

(2988, 17108)

# **Identifying movies with high similarity scores in the array**

In [164]:
similarity = cosine_similarity(vec)

In [165]:
similarity

array([[1.        , 0.04600437, 0.05006262, ..., 0.02043798, 0.048795  ,
        0.0727393 ],
       [0.04600437, 1.        , 0.07254763, ..., 0.01974496, 0.0942809 ,
        0.03513642],
       [0.05006262, 0.07254763, 1.        , ..., 0.06446026, 0.05129892,
        0.07647191],
       ...,
       [0.02043798, 0.01974496, 0.06446026, ..., 1.        , 0.08377078,
        0.03121953],
       [0.048795  , 0.0942809 , 0.05129892, ..., 0.08377078, 1.        ,
        0.0745356 ],
       [0.0727393 , 0.03513642, 0.07647191, ..., 0.03121953, 0.0745356 ,
        1.        ]])

# **Providing recommendations based on either a movie title or genre**

In [168]:
def recommend(dataset, input_value, recommendation_type="title", num_recommendations=10):
    if recommendation_type.lower() == "title":
        idx = dataset[dataset["title"] == input_value].index
        distances = sorted(list(enumerate(similarity[idx[0]])), reverse=True, key=lambda vec: vec[1])
        return [dataset.iloc[i[0]].title for i in distances[1:num_recommendations + 1]]

    elif recommendation_type.lower() == "genre":
        genre_movies = dataset[dataset["genre"].str.contains(input_value, case=False, na=False)]
        genre_indices = genre_movies.index
        genre_similarities = similarity[genre_indices].mean(axis=0)
        sorted_indices = np.argsort(genre_similarities)[::-1]
        recommendations = [dataset.iloc[i].title for i in sorted_indices if i not in genre_indices][:num_recommendations]
        return recommendations

    else:
        return "Invalid recommendation_type. Use 'title' or 'genre'."

In [171]:
print("\nRecommendations for the movies that the user may like:")
title_recommendations = recommend(new_dataset, "Your Eyes Tell", "title")
for movies in title_recommendations:
    print(f"- {movies}")

print("\nRecommendations for the movies from the same genre:")
genre_recommendations = recommend(new_dataset, "Drama", "genre")
for movies in genre_recommendations:
    print(f"- {movies}")


Recommendations for the movies that the user may like:
- About Time
- Free Fall
- The Man Without a Past
- Synecdoche, New York
- Promising Young Woman
- Becoming Jane
- To the Bone
- The Starling
- The Age of Adaline
- The Awful Truth

Recommendations for the movies from the same genre:
- Up
- Bread and Tulips
- Delicatessen
- Arsenic and Old Lace
- Midnight in Paris
- Kiki's Delivery Service
- Big Fish & Begonia
- The Tomorrow War
- The Sea Beast
- Sexmission
