## Netfelx Titles (Movie and TV Show) Dataset

In [70]:
# necessary imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import git 

In [71]:
# load the data
repo_path = git.Repo('.', search_parent_directories=True).working_tree_dir
data_path = repo_path + '/data/netflix_titles.csv'

df = pd.read_csv(data_path)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


### How does the number of titles change over the years?

In [72]:
# plot with plotly
fig = px.histogram(df, x='release_year', color='type', barmode='group')
fig.show()

### Busiest release months?

In [73]:
# find the busiest release months
df['date_added'] = pd.to_datetime(df['date_added'], format='mixed', errors='coerce')
df['month_added'] = df['date_added'].dt.month
df['month_name_added'] = df['date_added'].dt.month_name()

# plot with plotly
fig = px.histogram(df, x='month_name_added', color='type', barmode='group')
fig.show()

### Genres with the most titles?

In [74]:
df_genres = df['listed_in'].str.split(', ', expand=True).stack().value_counts() 
df_genres = pd.DataFrame(df_genres)
df_genres.reset_index(inplace=True)
df_genres.columns = ['genre', 'count']

# plot with plotly
fig = px.bar(df_genres, x='genre', y='count')    
fig.show()

In [75]:
# df['genres'] = df['listed_in'].str.split(', ')
# # now we need to explode the genres column
# df = df.explode('genres')

# # plot with plotly, sort by value counts
# fig = px.histogram(df, x='genres', color='type', barmode='group')
# fig.show()

In [76]:
df.head(2)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,month_added,month_name_added
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",9.0,September
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",9.0,September


### Rating Analysis

In [79]:
# plot with plotly
fig = px.histogram(df, x='rating', barmode='group')
fig.show()