# **Netflix Content Strategy Analysis**

In [None]:
# import the module
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

In [None]:
netflix_data = "/content/drive/MyDrive/Data_Sets/netflix_content_2023.csv"

In [None]:
netflix_data = pd.read_csv(netflix_data)

In [None]:
netflix_data.head()

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie


In [None]:
#hours viewed column to prepare it for analysis:

netflix_data['Hours Viewed'] = netflix_data['Hours Viewed'].replace(',', '',regex=True).astype(float)

In [None]:
netflix_data[['Title', 'Hours Viewed']].head()

Unnamed: 0,Title,Hours Viewed
0,The Night Agent: Season 1,812100000.0
1,Ginny & Georgia: Season 2,665100000.0
2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000.0
3,Wednesday: Season 1,507700000.0
4,Queen Charlotte: A Bridgerton Story,503000000.0


###  **Visualize the distribution of total viewership hours between Shows and Movies:**

In [None]:
# aggregate viewership hours by content type

Content_type_viewership = netflix_data.groupby('Content Type')['Hours Viewed'].sum()

fig = go.Figure(data=[
      go.Bar(
     x= Content_type_viewership.index,
     y= Content_type_viewership.values,
     marker_color = ['skyblue','salmon']
    )
])

fig.update_layout(
    title = 'Total viewership Hours by content type (2023)',
    xaxis_title = 'content type',
    yaxis_title= 'Total Hours Viewed (In billions)',
    xaxis_tickangle = 0,
    height =500,
    width = 500
)
fig.show()

###  **Analyze the distribution of viewership across different languages**

In [None]:
# aggregate viewership hours by language

language_viewership = netflix_data.groupby('Language Indicator')['Hours Viewed'].sum().sort_values(ascending=False)
fig = go.Figure(data = [
    go.Bar(
        x = language_viewership.index,
        y = language_viewership.values,
        marker_color = 'lightcoral'
    )
])

fig.update_layout(
    title = 'Total viewership Hours by language(2023)',
    xaxis_title = 'Language',
    yaxis_title = 'Total hours viewed (in billions)',
    xaxis_tickangle = 45,
    height = 600,
    width = 1000
)
fig.show()

### **Analyze how viewership varies based on release dates to identify any trends over time**

In [None]:
# Convert the "release date" to a datetime format and extracct the month.

netflix_data['Release Date'] = pd.to_datetime(netflix_data['Release Date'])
netflix_data['Release month'] = netflix_data['Release Date'].dt.month

# aggregate viewership hours by release month
monthly_viewership = netflix_data.groupby('Release month')['Hours Viewed'].sum()

fig = go.Figure(data=[
    go.Scatter(
        x = monthly_viewership.index,
        y = monthly_viewership.values,
        mode = 'lines+markers',
        marker = dict(color = 'blue'),
        line = dict(color= 'blue')
    )
])

fig.update_layout(
    title = 'Total Viewership Hours By Release Month (2023)',
    xaxis_title = 'month',
    yaxis_title = 'Total Hours Viewed (In bilions)',
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(1,13)),
        ticktext = ['jan','feb','mar','apr','may','jun','july','aug','sep','oct','nov','dec']
    ),
    height = 400,
    width = 1000
)
fig.show()


### **Viewership trends by content type:**

In [None]:
# agregate viewership hours by content type and release month
netflix_data['Release Date'] = pd.to_datetime(netflix_data['Release Date'])
netflix_data['Release Month'] = netflix_data['Release Date'].dt.month

monthly_viewership_by_type = netflix_data.pivot_table(index='Release Month',
                                                columns= 'Content Type',
                                                values = 'Hours Viewed',
                                                aggfunc = 'sum')

fig = go.Figure()
for content_type in monthly_viewership_by_type.columns:
    fig.add_trace(
        go.Scatter(
            x = monthly_viewership_by_type.index,
            y = monthly_viewership_by_type[content_type],
            mode = 'lines+markers',
            name = content_type
        )
    )
fig.update_layout(
    title= 'Viewership Trends by content Type and Release Month(2023)',
    xaxis_title = 'Month',
    yaxis_title= 'Total Hours Viewed (in billions)',
    xaxis = dict(
        tickmode = 'array',
        tickvals = list(range(1,13)),
        ticktext = ['jan','Feb','Mar','Apr','May','Jun','JUl','Aug','Sep','Oct','Nov','Dec']
    ),
    height = 500,
    width = 1000,
    legend_title = 'Content Type'
   )
fig.show()

### **Explore the total viewership hours distributed across different release seasons:**



In [None]:
# Define seasons based on release months
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# Ensure 'Release Date' is a datetime column (if not already)
netflix_data['Release Date'] = pd.to_datetime(netflix_data['Release Date'])

# Extract the release month and apply season categorization
netflix_data['Release Month'] = netflix_data['Release Date'].dt.month
netflix_data['Release Season'] = netflix_data['Release Month'].apply(get_season)

# Aggregate viewership hours by release season
seasonal_viewership = netflix_data.groupby('Release Season')['Hours Viewed'].sum()

# Order the seasons and handle missing data
seasons_order = ['Winter', 'Spring', 'Summer', 'Fall']
seasonal_viewership = seasonal_viewership.reindex(seasons_order, fill_value=0)

# Create the bar chart
fig = go.Figure(data=[
    go.Bar(
        x=seasonal_viewership.index,
        y=seasonal_viewership.values,
        marker_color='coral'
    )
])

# Update layout for the plot
fig.update_layout(
    title='Total Viewership Hours by Release Season (2023)',
    xaxis_title='Season',
    yaxis_title='Total Hours Viewed (in billions)',
    xaxis_tickangle=0,
    height=500,
    width=800,
    xaxis=dict(
        categoryorder='array',
        categoryarray=seasons_order
    )
)
fig.show()

###  **Analyze the number of content releases and their viewership hours across months:**

In [None]:
monthly_release = netflix_data['Release Month'].value_counts().sort_index()

monthly_viewership = netflix_data.groupby('Release Month')['Hours Viewed'].sum()

fig = go.Figure()
fig.add_trace(
    go.Bar(
        x = monthly_release.index,
        y = monthly_release.values,
        name = 'Number of Releases',
        marker_color ='goldenrod',
        opacity=0.7,
        yaxis='y1'

        )
)
fig.add_trace(
    go.Scatter(
        x = monthly_viewership.index,
        y = monthly_viewership.values,
        name = 'Viewership Hours',
        mode = 'lines+markers',
        marker = dict(color='red'),
        line=dict(color='red'),
        yaxis ='y2'

    )
)
fig.update_layout(
    title = 'Monthly Release Patterns and Viewership Hours(2023)',
    xaxis = dict(
        title = 'Month',
        tickmode='array',
        tickvals=list(range(1,13)),
        ticktext=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    ),
  yaxis = dict(
    title = 'Number of Releases',
    showgrid = True,
    side = 'left'

),
  yaxis2 =dict(
    title = 'Total Hours Viewed(in billions)',
    overlaying='y',
    side = 'right',
    showgrid=False
),
  legend = dict(
    x = 1.05,
    y = 1,
    orientation='v',
    xanchor='left'
),
    height = 600,
    width = 1000

)
fig.show()



### **Analyze the Weekly release pattern and viewership**

In [None]:
netflix_data['Release Date'] = pd.to_datetime(netflix_data['Release Date'])
netflix_data['Release Day'] = netflix_data['Release Date'].dt.day_name()

weekday_releases = netflix_data['Release Day'].value_counts().reindex(
    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
)

# aggregate viewership hours by day of the week
weekday_viewership = netflix_data.groupby('Release Day')['Hours Viewed'].sum().reindex(
    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
)

fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=weekday_releases.index,
        y=weekday_releases.values,
        name='Number of Releases',
        marker_color='blue',
        opacity=0.6,
        yaxis='y1'
    )
)

fig.add_trace(
    go.Scatter(
        x=weekday_viewership.index,
        y=weekday_viewership.values,
        name='Viewership Hours',
        mode='lines+markers',
        marker=dict(color='red'),
        line=dict(color='red'),
        yaxis='y2'
    )
)

fig.update_layout(
    title='Weekly Release Patterns and Viewership Hours (2023)',
    xaxis=dict(
        title='Day of the Week',
        categoryorder='array',
        categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    ),
    yaxis=dict(
        title='Number of Releases',
        showgrid=False,
        side='left'
    ),
    yaxis2=dict(
        title='Total Hours Viewed (in billions)',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(
        x=1.05,
        y=1,
        orientation='v',
        xanchor='left'
    ),
    height=600,
    width=1000
)

fig.show()

**Dataset Link**

https://drive.google.com/file/d/1YDMmxWQ3dKU_P35JsVZzOrW_HGUInrz_/view?usp=drive_link