<a href="https://colab.research.google.com/github/Nithin46/PB-Project/blob/main/PB_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import datetime
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv("/content/drive/MyDrive/Project.csv")
print(data.shape)
data.head()

(22916, 12)


Unnamed: 0,Platform,Type,Title,Director,Cast,Country,Date_added,Release_year,Rating,Duration,Listed_in,Description
0,Netflix,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,25-Sep-21,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,Netflix,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,24-Sep-21,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,Netflix,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,24-Sep-21,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,Netflix,TV Show,Jailbirds New Orleans,,,,24-Sep-21,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,Netflix,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,24-Sep-21,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [3]:
data.nunique() # To find unique content on each row

Platform            4
Type                2
Title           22034
Director        10083
Cast            16687
Country           885
Date_added       2051
Release_year      101
Rating            105
Duration          253
Listed_in        1677
Description     22588
dtype: int64

In [4]:
data.isnull().sum() # It will give the Count of each column where NaN is present

Platform            0
Type                0
Title               0
Director         8226
Cast             5305
Country         11455
Date_added       9554
Release_year        0
Rating            863
Duration          482
Listed_in           0
Description         4
dtype: int64

**Cleaning the Data**

From the above result, we can see that director,cast,country,date_added and rating columns have missing values. First, we are handling those missing values.

In [5]:
data.Director.fillna("No Director", inplace=True)
data.Cast.fillna("No Cast", inplace=True)
data.Country.fillna("Country Unavailable", inplace=True)

In [6]:
data.isnull().sum()

Platform           0
Type               0
Title              0
Director           0
Cast               0
Country            0
Date_added      9554
Release_year       0
Rating           863
Duration         482
Listed_in          0
Description        4
dtype: int64

In [7]:
Visualization = px.pie(values=data['Platform'].value_counts(), names=data['Platform'].value_counts().index,
              title='Contribution of each platform')

Visualization.show()

**2) Displaying the content type based on the selected Country**

Certain films and television shows have many country names. We only took into account the first country name that appeared in the country column.

In [8]:
data['Country'] = [Countries[0] for Countries in data['Country'].str.split(',')]

In [9]:
def visualise_country(Country):
    if (Country == ALL):
        data_vis = data
    
    else:
        data_vis = data[data.Country == Country]
        
    Visualization = px.pie(values=data_vis['Type'].value_counts(), 
             names=data_vis['Type'].value_counts().index, 
             title=f'Total number of TV-Shows and Movies from {Country}.')
    Visualization.show()

In [10]:
import ipywidgets as widgets
from ipywidgets.widgets.interaction import show_inline_matplotlib_plots

ALL = 'ALL'
def total_unique_country_names(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

dropdown_country = widgets.Dropdown(options = total_unique_country_names(data.Country))
output_country = widgets.Output()

def dropdown_country_eventhandler(change):
    output_country.clear_output()
    with output_country:
        display(visualise_country(change.new))
        
dropdown_country.observe(dropdown_country_eventhandler, names='value')
display(dropdown_country)

Dropdown(options=('ALL', '', 'Afghanistan', 'Argentina', 'Australia', 'Austria', 'Bangladesh', 'Belarus', 'Bel…

In [11]:
display(output_country)

Output()

**3. Top 15 Countries producing the content to Netflix**

In [12]:
data_country = data['Country'].value_counts().sort_values(ascending=False)
top15countries = data_country.head(15)
top15countries

Country Unavailable    11455
United States           5584
India                   1248
United Kingdom           889
Japan                    536
Canada                   378
France                   239
South Korea              224
Spain                    193
Australia                152
Mexico                   139
Germany                  125
Egypt                    113
Turkey                   111
China                    105
Name: Country, dtype: int64

In [13]:
Visualization = px.pie(values=top15countries, 
                       names=top15countries.index,title='Top 15 Countries producing the content')

Visualization.show()

In [14]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [15]:
df = data['Country'].value_counts()

iplot([go.Choropleth(
    locationmode='country names',
    locations=df.index.values,
    text=df.index,
    z=df.values,
)])

In [20]:
Rating = data[data['Platform'] == 'Netflix']
Ratings = Rating['Rating'].value_counts()
Ratings

TV-MA       3207
TV-14       2160
TV-PG        863
R            799
PG-13        490
TV-Y7        334
TV-Y         307
PG           287
TV-G         220
NR            80
G             41
TV-Y7-FV       6
UR             3
NC-17          3
84 min         1
74 min         1
66 min         1
Name: Rating, dtype: int64

In [21]:
Visualization = px.funnel(Ratings,title='Types of Rating on Netflix')

Visualization.show()