# VISUALISATION- Netflix OTT Revenue and Subscribers


<b>Import Necessary Libraries </b>


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from warnings import filterwarnings

filterwarnings("ignore")
%matplotlib inline

<b>Load the dataset
</b>

In [2]:
df = pd.read_csv('Netflix Revenue.csv')

In [3]:
df

In [4]:
df.head()

The columns in the dataset include:

Date: The date corresponding to the metrics recorded.

Global Revenue: The total revenue generated by Netflix globally.

UACN Revenue: The revenue generated by Netflix from the UACN region.

EMEA Revenue: The revenue generated by Netflix from the EMEA (Europe, Middle East, and Africa) region.

LATM Revenue: The revenue generated by Netflix from the LATM (Latin America) region.

APAC Revenue: The revenue generated by Netflix from the APAC (Asia-Pacific) region.

UACN Members: The number of Netflix members in the UACN region.

EMEA Members: The number of Netflix members in the EMEA region.

LATM Members: The number of Netflix members in the LATM region.

APAC Members: The number of Netflix members in the APAC region.

EMEA RPU: Revenue per user (RPU) in the EMEA region.

LATM RPU: Revenue per user (RPU) in the LATM region.

APAC RPU: Revenue per user (RPU) in the APAC region.

Domestic Members: The number of Netflix members in the domestic market.

Domestic Revenue: The revenue generated by Netflix in the domestic market.

International Members: The number of Netflix members in the international market.

International Revenue: The revenue generated by Netflix in the international market.

Domestic Free Trialers: The number of Netflix free trial users in the domestic market.

International Free Trialers: The number of Netflix free trial users in the international market.

Netflix Global Users: The total number of Netflix users worldwide.

In [5]:
df.info()

In [6]:
pd.set_option('display.max_columns',25)
pd.set_option('display.max_rows',30)

<b>Data types</b>

In [7]:
df.dtypes

In [8]:
df.columns

<b>Missing values
</b>

In [9]:
#Missing values checking
df.isnull().sum()

In [10]:
df.duplicated()


In [11]:
# Remove commas and convert 'Netflix Global Users' column to numeric
df['Netflix Global Users'] = df['Netflix Global Users'].str.replace(',', '').astype(float)

# Extract the year from the 'Date' column
df['Year'] = pd.to_datetime(df['Date']).dt.year

In [12]:
df.describe()

In [13]:
df['Global Revenue'] = df['Global Revenue'].astype('int')

## --------------------------------------------------------------

<b>VISUALISATION using "matplotlib"</b>

<b>Netflix Global Revenue Over Time</b>

In [14]:
# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

<b>Date vs Global Revenue </b>

In [15]:
# Sort the dataframe by date
df = df.sort_values('Date')

In [16]:
#Plotting Date vs Global Revenue 
plt.plot('Date','Global Revenue',data=df)
plt.xlabel('Year of Date')
plt.ylabel('revenue in US dollars (in billions, 10^9)')
plt.title('Netflix : Global Revenue in billion Dollars')
plt.show()

<b> Average Netflix Global Users by Year </b>

In [17]:
average_users = df.groupby('Year')['Netflix Global Users'].mean()

# Plot the average global users over time
plt.figure(figsize=(12, 6))
plt.plot(average_users.index, average_users.values)
plt.title('Average Netflix Global Users by Year')
plt.xlabel('Year')
plt.ylabel('Average Global Users')
plt.xticks(rotation=45)

# Modify the y-axis tick labels
plt.gca().get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,.2f}".format(x)))

plt.show()

<b>Netflix Global Users Trend</b>

In [18]:
# Sort the data by date
df.sort_values('Date', inplace=True)

# Plot the global users trend over time
plt.figure(figsize=(12, 6))
plt.plot(df['Date'], df['Netflix Global Users'], color='green')
plt.title('Netflix Global Users Trend', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Global Users', fontsize=12)
plt.xticks(rotation=45)
plt.grid(True)

# Modify the y-axis tick labels
plt.gca().get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,.2f}".format(x)))

plt.show()

<b>Revenue by Region
</b>

In [19]:
# Drop rows with missing values in the revenue columns
df_clean = df.dropna(subset=['UACN Revenue', 'EMEA Revenue', 'LATM Revenue', 'APAC Revenue'])

In [20]:
# Calculate the sum of revenue by region
revenue_by_region = df_clean[['UACN Revenue', 'EMEA Revenue', 'LATM Revenue', 'APAC Revenue']].sum()

In [21]:
# Plot the bar chart of revenue by region
plt.figure(figsize=(10, 6))
revenue_by_region.plot(kind='bar',)
plt.title('Revenue by Region', fontsize=16)
plt.xlabel('Region', fontsize=12)
plt.ylabel('Revenue', fontsize=12)
plt.xticks(rotation=0)


# Modify the y-axis tick labels
plt.gca().get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,.2f}".format(x)))

plt.show()


In [22]:
revenue_by_region

<b>FULLFORMS</b> for next visualisation

UACN : United States and Canada
    
EMEA : Europe, Middle East and Africa
    
APAC : Asia-Pacific
    
LATM : Latin America

In [23]:
#Setting Required Series for Region Wise Revenue
x_time = df['Date'][28::] #(before 2019, region wise data is not available)
UACN = df['UACN Revenue'][28::]
EMEA = df['EMEA Revenue'][28::]
LATM= df['LATM Revenue'][28::]
APAC = df['APAC Revenue'][28::]
#Resetting Index of Each series to 0 because it was 28 , otherwise it will show key error.
APAC.reset_index(drop=True,inplace=True)
EMEA.reset_index(drop=True,inplace=True)
LATM.reset_index(drop=True,inplace=True)
UACN.reset_index(drop=True,inplace=True)
x_time.reset_index(drop=True,inplace=True)

In [24]:
#Plotting Date vs Region Wise Revenue
plt.plot(x_time,UACN,color='b',label='UACN')
plt.plot(x_time,EMEA,color='r',label='EMEA')
plt.plot(x_time,LATM,color='g',label='LATM')
plt.plot(x_time,APAC,color='m',label='APAC')
plt.legend()
# plt.margins(0.15) #Default margin around data limits is 5%(0.05)
plt.tight_layout() #maintain good layout
plt.xlabel('Year(Month)')
plt.ylabel('Revenue in Billion Dollars (10^9)')
plt.title('Netflix : Region Wise Revenue in Billion Dollars')
plt.show()

In [25]:
#Setting Required Series for Region wise membership
uacn_members = df['UACN Members'][28::]
emea_members = df['EMEA  Members'][28::]
apac_members = df['APAC Members'][28::]
latm_members = df['LATM Members'][28::]
#Resetting Index
uacn_members.reset_index(drop=True,inplace=True)
emea_members.reset_index(drop=True,inplace=True)
latm_members.reset_index(drop=True,inplace=True)
apac_members.reset_index(drop=True,inplace=True)

In [26]:
#Plotiing date vs Region Wise Membership
plt.plot(x_time,uacn_members,color='b',label='UACN')
plt.plot(x_time,emea_members,color='r',label='EMEA')
plt.plot(x_time,latm_members,color='g',label='LATM')
plt.plot(x_time,apac_members,color='m',label='APAC')
plt.legend()
plt.xlabel('Year(Month)')
plt.ylabel('Region wise Members (in Ten Millions, 10^7)')
plt.title('Region Wise Paid Membership in Ten Millions')
plt.tight_layout()

In [27]:
#Setting Required Series for Region Wise RPU
uacn_rpu = df['UACN RPU'][28::]
emea_rpu = df['EMEA RPU'][28::]
latm_rpu = df['LATM  RPU'][28::]
apac_rpu = df['APAC  RPU'][28::]
#Resetting Index
uacn_rpu.reset_index(drop=True,inplace=True)
emea_rpu.reset_index(drop=True,inplace=True)
latm_rpu.reset_index(drop=True,inplace=True)
apac_rpu.reset_index(drop=True,inplace=True)

In [28]:
plt.plot(x_time,uacn_rpu,color='b',label='UACN RPU')
plt.plot(x_time,emea_rpu,color='r',label='EMEA RPU')
plt.plot(x_time,latm_rpu,color='g',label='LATM RPU')
plt.plot(x_time,apac_rpu,color='m',label='APAC RPU')
plt.legend()
plt.tight_layout()
plt.title('Netflix : Region wise Revenue per User in dollars')
plt.xlabel('Year(Month)')
plt.ylabel('Revenue in Dollars')
plt.show()


In [29]:
fig, axs = plt.subplots(2,2)
fig.suptitle('UACN vs EMEA vs LATM vs APAC Revenue Trends', fontsize =15)
axs[0,0].plot(df['Date'],df['UACN Revenue'],color= "yellow")
axs[0,1].plot(df['Date'],df['EMEA Revenue'],color= "blue")
axs[1,0].plot(df['Date'],df['LATM Revenue'],color= "red")
axs[1,1].plot(df['Date'],df['APAC Revenue'],color= "magenta")
axs[0,0].set_title('UACN Revenue', fontdict= {'color':'black','size':12})
axs[0,1].set_title('EMEA Revenue',fontdict= {'color':'black','size':12})
axs[1,0].set_title('LATM Revenue', fontdict= {'color':'black','size':12})
axs[1,1].set_title('APAC Revenue', fontdict= {'color':'black','size':12})

for ax in axs.flat:
    ax.set(xlabel='Time', ylabel='Revenue' )
for ax in axs.flat:
    ax.set_xticklabels([])
    ax.label_outer()

<b>UACN vs EMEA vs LATM vs APAC Members Trends</b>

In [30]:
fig, axs = plt.subplots(2,2)
fig.suptitle('UACN vs EMEA vs LATM vs APAC Members Trends',fontsize=15 )
axs[0,0].plot(df['Date'],df['UACN Members'],color= "yellow")
axs[0,1].plot(df['Date'],df['EMEA  Members'],color= "red")
axs[1,0].plot(df['Date'],df['LATM Members'],color= "blue")
axs[1,1].plot(df['Date'],df['APAC Members'],color= "magenta")

axs[0,0].set_title('UACN Members', fontdict= {'color':'black','size':12})
axs[0,1].set_title('EMEA Members',fontdict= {'color':'black','size':12})
axs[1,0].set_title('LATM Members', fontdict= {'color':'black','size':12})
axs[1,1].set_title('APAC Mmebers', fontdict= {'color':'black','size':12})

   
for ax in axs.flat:
    ax.set(xlabel='Time', ylabel='Revenue' )
for ax in axs.flat:
    ax.set_xticklabels([])
    ax.label_outer()


<b>UACN vs EMEA vs LATM vs APAC RPU(revenue per member) Trends
</b>

In [31]:
fig, axs = plt.subplots(2,2)
fig.suptitle('UACN vs EMEA vs LATM vs APAC RPU Trends',fontsize=15 )
axs[0,0].plot(df['Date'],df['UACN RPU'],color= "yellow")
axs[0,1].plot(df['Date'],df['EMEA RPU'],color= "red")
axs[1,0].plot(df['Date'],df['LATM  RPU'],color= "blue")
axs[1,1].plot(df['Date'],df['APAC  RPU'],color= "magenta")

axs[0,0].set_title('UACN RPU', fontdict= {'color':'black','size':12})
axs[0,1].set_title('EMEA RPU',fontdict= {'color':'black','size':12})
axs[1,0].set_title('LATM RPU', fontdict= {'color':'black','size':12})
axs[1,1].set_title('APAC RPU', fontdict= {'color':'black','size':12})

   
for ax in axs.flat:
    ax.set(xlabel='Time', ylabel='RPU' )
for ax in axs.flat:
    ax.set_xticklabels([])
    ax.label_outer()

In [32]:
year=0
change_revenue=[]
for i in df['Global Revenue']:
    if year==0:
        revenue_old=i
        year = year+1
    else:
        difference_revenue = i-revenue_old
        revenue_old =i
        change_revenue.append(difference_revenue)
#Excluding first value which cannot subtract from previous revenue because it is first value.
change_revenue

In [33]:
change_revenue_time = df['Date'][1::]
#Resetting Index
change_revenue_time.reset_index(drop=True,inplace=True)

In [34]:
#Plotting Date vs Change in Netflix's Revenue
plt.plot(change_revenue_time,change_revenue)
plt.xlabel('Year')
plt.ylabel('Change in Revenue in Hundread Millions (in Dollars)')
plt.title('Netflix : Change in Revenue in Hundread Millions (in Dollars)')
plt.show()

<b>Domestic vs International Revenue Trends</b>

In [35]:
fig, axs = plt.subplots(2)
fig.suptitle('Domestic vs International Revenue Trends')
axs[0].plot(df['Date'],df["Domestic Revenue"],color= "green")
axs[1].plot(df['Date'],df["International Revenue"],color= "brown")
axs[0].set_title('Domestic Revenue vs Time', fontdict= {'color':'black','size':10})
axs[1].set_title('International Revenue vs Time',fontdict= {'color':'black','size':10})
plt.xticks(rotation=90, ha='right')
for ax in axs.flat:
    ax.set(xlabel='Date', ylabel='Revenue')
for ax in axs.flat:
    ax.label_outer()

<b>Domestic vs International Free Trialers Trends</b>

In [36]:
fig, axs = plt.subplots(2)
fig.suptitle('Domestic vs International Free Trialers Trends')
axs[0].plot(df['Date'],df["Domestic Free Trialers"],color= "green")
axs[1].plot(df['Date'],df['Interntaional Free Trialers'],color= "brown")
axs[0].set_title('Domestic Free Trialers vs Time', fontdict= {'color':'black','size':10})
axs[1].set_title('International Free Trialers vs Time',fontdict= {'color':'black','size':10})
plt.xticks(rotation=90, ha='right')

for ax in axs.flat:
    ax.set(xlabel='Date', ylabel='Free Trialers')

for ax in axs.flat:
    ax.label_outer()

# THANK YOU 