In [1]:
pip install dash --quite


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --quite


In [2]:
pip install fuzzywuzzy --quite


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --quite


In [3]:
#importing required packages
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from dash import Dash, dcc, html, Input, Output
from fuzzywuzzy import process



In [4]:
# Loading the dataset
df = pd.read_csv('netflix_titles.csv')

In [5]:
# Checking for missing values in the 'listed_in' and 'title' columns
print("Missing Values in 'listed_in':", df['listed_in'].isnull().sum())
print("Missing Values in 'title':", df['title'].isnull().sum())

Missing Values in 'listed_in': 0
Missing Values in 'title': 0


In [6]:
# Dropping rows with missing values in 'listed_in' or 'title'
df = df.dropna(subset=['listed_in', 'title'])


In [7]:
# Combining genres into a single string for each title
df['genres_cleaned'] = df['listed_in'].apply(lambda x: x.lower().replace(' ', '').replace(',', ' '))

In [8]:
#Creating a Count Vectorizer to transform genres into feature vectors
vectorizer = CountVectorizer()
genre_vectors = vectorizer.fit_transform(df['genres_cleaned'])

In [9]:
# Computing cosine similarity
cosine_sim = cosine_similarity(genre_vectors, genre_vectors)

# List of unique genres
unique_genres = sorted(set([genre.strip() for sublist in df['listed_in'].str.split(',') for genre in sublist]))

In [10]:
# Function to recommend titles based on a given title
def recommend(title, cosine_sim=cosine_sim, df=df, top_n=5):
    # Getting the index of the given title
    try:
        idx = df[df['title'].str.contains(title, case=False, na=False)].index[0]
    except IndexError:
        return f"Title '{title}' not found in the dataset. Please check the spelling or try another title."

    # Getting similarity scores for all titles with the given title
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sorting titles based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Getting the indices of the top_n most similar titles
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]

    # Returning the recommended titles
    return df.iloc[top_indices][['title', 'listed_in']]


In [11]:
# Recommendation function with fuzzy matching
def recommend(title, genre_filter=None, release_year=None, content_type=None, top_n=5):
    # Fuzzy match the title
    matched_title = process.extractOne(title, df['title'])[0] if title else None
    idx = df[df['title'] == matched_title].index[0]

    # Calculate similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]

    # Filter recommendations
    recommendations = df.iloc[top_indices]
    if genre_filter:
        recommendations = recommendations[recommendations['listed_in'].str.contains(genre_filter, case=False, na=False)]
    if release_year:
        recommendations = recommendations[recommendations['release_year'] == int(release_year)]
    if content_type:
        recommendations = recommendations[recommendations['type'] == content_type]

    return recommendations[['title', 'listed_in', 'release_year', 'type']].head(top_n)


In [12]:
app = Dash(__name__)

# App layout
app.layout = html.Div([
    html.Div([
        html.H1("Netflix Recommendation Dashboard", style={'textAlign': 'center', 'color': '#ffffff', 'backgroundColor': '#333333', 'padding': '10px'}),
    ]),

    html.Div([
        html.Label("Enter a Movie or TV Show Title:", style={'fontWeight': 'bold'}),
        dcc.Input(id="input-title", type="text", placeholder="Enter a title", style={'width': '60%', 'padding': '10px'}),
        html.Br(),

        html.Label("Filter by Genre:", style={'fontWeight': 'bold'}),
        dcc.Dropdown(
            id="genre-filter",
            options=[{'label': genre, 'value': genre} for genre in unique_genres],
            placeholder="Select a genre",
            style={'width': '60%'}
        ),
        html.Br(),

        html.Label("Filter by Release Year:", style={'fontWeight': 'bold'}),
        dcc.Input(id="release-year", type="number", placeholder="Enter release year", style={'width': '60%', 'padding': '10px'}),
        html.Br(),

        html.Label("Filter by Content Type:", style={'fontWeight': 'bold'}),
        dcc.Dropdown(
            id="content-type",
            options=[
                {'label': 'Movie', 'value': 'Movie'},
                {'label': 'TV Show', 'value': 'TV Show'}
            ],
            placeholder="Select content type",
            style={'width': '60%'}
        ),
        html.Br(),

        html.Button("Get Recommendations", id="recommend-button", n_clicks=0, style={'backgroundColor': '#007BFF', 'color': '#ffffff', 'padding': '10px 20px', 'border': 'none', 'borderRadius': '5px'}),
    ], style={'textAlign': 'center', 'padding': '20px'}),

    html.Div(id="output-recommendations", style={'marginTop': '20px', 'textAlign': 'center', 'padding': '20px'}),

    html.Div(id="genre-visualization", style={'marginTop': '20px', 'textAlign': 'center', 'padding': '20px'}),
], style={'fontFamily': 'Arial, sans-serif', 'backgroundColor': '#F4F4F4', 'padding': '20px'})


In [13]:
# Callback for recommendations
@app.callback(
    [Output("output-recommendations", "children"),
     Output("genre-visualization", "children")],
    [Input("recommend-button", "n_clicks")],
    [Input("input-title", "value"),
     Input("genre-filter", "value"),
     Input("release-year", "value"),
     Input("content-type", "value")]
)
def update_recommendations(n_clicks, title, genre_filter, release_year, content_type):
    if not title:
        return html.Div("Please enter a title to get recommendations.", style={'color': 'red'}), None

    recommendations = recommend(title, genre_filter, release_year, content_type)

    if recommendations.empty:
        return html.Div(f"No recommendations found for '{title}'. Please try another title.", style={'color': 'red'}), None

    # Recommendations Table
    recommendation_table = html.Table([
        html.Tr([html.Th("Title"), html.Th("Genres"), html.Th("Release Year"), html.Th("Type")])] +
        [html.Tr([html.Td(row['title']), html.Td(row['listed_in']), html.Td(row['release_year']), html.Td(row['type'])])
         for _, row in recommendations.iterrows()]
    , style={'margin': 'auto', 'border': '1px solid black', 'borderCollapse': 'collapse', 'width': '80%'})

    # Visualization of genres
    genre_counts = recommendations['listed_in'].str.split(',').explode().value_counts().head(5)
    genre_bar = dcc.Graph(
        figure={
            'data': [{'x': genre_counts.index, 'y': genre_counts.values, 'type': 'bar', 'name': 'Genres'}],
            'layout': {'title': 'Top Genres in Recommendations'}
        }
    )

    return recommendation_table, genre_bar

# Running the app
if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

Explanation:
This Netflix Recommendation System uses content-based filtering to suggest similar titles based on genres. Here's how it works:

Preprocessing:
The listed_in column (genres) is cleaned and transformed into numerical feature vectors using the CountVectorizer.

Cosine Similarity:
A similarity matrix is calculated to compare how similar the genres of any two titles are.

Fuzzy Matching:
Ensures robust title matching by allowing partial or approximate matches for user input.

Filtering:
Users can filter recommendations by genre, release year, and content type (Movie/TV Show).

Interactive Dashboard:
Built with Dash, the dashboard allows users to enter a title and customize filters, then view recommendations in a neatly formatted table.

This approach provides a flexible and interactive way to explore Netflix-like recommendations based on content similarity.