In [19]:
!pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357278 sha256=417c3f0aac9af66f459e336bf998f7b533e1123bef1cf37a4bd8b79c47fef918
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
try:
    movies_df = pd.read_csv("/content/netflix_titles_nov_2019.csv")
except Exception as e:
    print("An error occurred while loading the dataset: ", str(e))

# Check the columns of the dataset
print(movies_df.columns)

# Extract useful features
def preprocess_data(df):
    # One-hot encode genres
    genre_cols = pd.get_dummies(df["listed_in"], drop_first=True)

    # Extract release year
    df["release_year"] = pd.to_datetime(df["release_year"], errors='coerce').dt.year

    # Combine features into a matrix
    feature_matrix = pd.concat([df[["release_year"]], genre_cols], axis=1).fillna(0)
    return feature_matrix

# Preprocess the dataset
feature_matrix = preprocess_data(movies_df)

# Function to recommend similar movies
def recommend_movies(movie_id, num_recs=10):
    # Get the movie's feature vector
    movie_features = feature_matrix.iloc[movie_id].values.reshape(1, -1)

    # Calculate cosine similarity between the selected movie and all others
    similarities = cosine_similarity(movie_features, feature_matrix.values).flatten()

    # Get indices of most similar movies, excluding the input movie itself
    similar_movies = similarities.argsort()[-(num_recs + 1):-1][::-1]

    # Return the top recommendations
    return movies_df.iloc[similar_movies][["title", "release_year", "listed_in"]]

# Example: Recommend movies for the first movie in the dataset
movie_id = 0  # Replace with any movie ID
recommendations = recommend_movies(movie_id, num_recs=10)

# Display recommendations
print("Recommended Movies:")
print(recommendations)


Index(['show_id', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description',
       'type'],
      dtype='object')
Recommended Movies:
                               title  release_year  \
1724                A Korean Odyssey          1970   
5226                       My Runway          1970   
382                       Second 20s          1970   
3267           Something in the Rain          1970   
378                  Another Miss Oh          1970   
4823     What in the World Happened?          1970   
1901         Romance is a bonus book          1970   
5016                   One More Time          1970   
5103                           Spark          1970   
899   Rookie Historian Goo Hae-Ryung          1970   

                                              listed_in  
1724  International TV Shows, Korean TV Shows, Roman...  
5226  International TV Shows, Korean TV Shows, Roman...  
382   International TV Shows