In [None]:
# Install kaleido for Plotly image export
!pip install -U kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [None]:
 pip install -U kaleido



In [None]:
file_path = '/content/Netflix Userbase.csv'
netflix_userbase = pd.read_csv(file_path)

In [10]:
netflix_userbase.tail(50)

Unnamed: 0,User ID,Subscription Type,Monthly Revenue,Join Date,Last Payment Date,Country,Age,Gender,Device,Plan Duration
2450,2451,Premium,15,31-10-22,12-07-23,Spain,48,Female,Tablet,1 Month
2451,2452,Basic,14,04-11-22,13-07-23,Spain,31,Female,Tablet,1 Month
2452,2453,Standard,15,06-11-22,11-07-23,United States,48,Female,Smartphone,1 Month
2453,2454,Standard,10,08-11-22,13-07-23,Canada,36,Male,Smartphone,1 Month
2454,2455,Basic,10,09-11-22,11-07-23,United States,38,Female,Tablet,1 Month
2455,2456,Premium,13,11-11-22,13-07-23,United States,39,Male,Smartphone,1 Month
2456,2457,Basic,11,12-11-22,10-07-23,Canada,48,Male,Tablet,1 Month
2457,2458,Standard,12,12-11-22,12-07-23,United Kingdom,30,Female,Smart TV,1 Month
2458,2459,Premium,12,12-11-22,12-07-23,Australia,30,Male,Smart TV,1 Month
2459,2460,Basic,15,12-11-22,13-07-23,Germany,35,Female,Smart TV,1 Month


In [None]:
netflix_userbase.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   User ID            2500 non-null   int64 
 1   Subscription Type  2500 non-null   object
 2   Monthly Revenue    2500 non-null   int64 
 3   Join Date          2500 non-null   object
 4   Last Payment Date  2500 non-null   object
 5   Country            2500 non-null   object
 6   Age                2500 non-null   int64 
 7   Gender             2500 non-null   object
 8   Device             2500 non-null   object
 9   Plan Duration      2500 non-null   object
dtypes: int64(3), object(7)
memory usage: 195.4+ KB


In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
from google.colab import files
import imageio
from dateutil import parser

# Load the provided dataset for Netflix userbase
userbase_file_path = '/content/Netflix Userbase.csv'
userbase_data = pd.read_csv(userbase_file_path)

# Load the original Netflix dataset
file_path = '/content/netflix_titles.csv'
netflix_data = pd.read_csv(file_path)

# Convert date columns to datetime format
userbase_data['Join Date'] = pd.to_datetime(userbase_data['Join Date'], format='%d-%m-%y')
userbase_data['Last Payment Date'] = pd.to_datetime(userbase_data['Last Payment Date'], format='%d-%m-%y')

# Function to parse dates with varying formats
def parse_dates(date):
    try:
        return parser.parse(date)
    except:
        return pd.NaT

# Extract relevant data from Netflix dataset
netflix_data['date_added'] = netflix_data['date_added'].apply(parse_dates)
netflix_data['year_added'] = netflix_data['date_added'].dt.year

# 1. Distribution of Movies and TV Shows (Bar Chart)
type_counts = netflix_data['type'].value_counts()
plt.figure(figsize=(8, 6))
sns.barplot(x=type_counts.index, y=type_counts.values, palette="pastel", hue=type_counts.index, dodge=False, legend=False)
plt.title('Distribution of Movies and TV Shows on Netflix')
plt.xlabel('Type')
plt.ylabel('Count')
plt.savefig('distribution_movies_tvshows.png')
plt.close()

# 2. Trend of Content Added Over the Years (Line Chart)
content_over_years = netflix_data.groupby('year_added').size().reset_index(name='count')
fig1 = px.line(content_over_years, x='year_added', y='count', title='Growth of Netflix Content Over the Years', color_discrete_sequence=px.colors.sequential.Bluyl)
fig1.update_xaxes(title_text='Year')
fig1.update_yaxes(title_text='Number of Titles')
fig1.add_annotation(x=1, y=1, xref="paper", yref="paper", showarrow=False, text="Source: Netflix Dataset")
fig1.write_html("trend_content_added.html")

# 3. Top 10 Countries Producing Netflix Content (Choropleth Map)
country_counts = netflix_data['country'].value_counts().nlargest(10)
fig2 = px.choropleth(netflix_data[netflix_data['country'].isin(country_counts.index)],
                     locations='country', locationmode='country names',
                     color='country',
                     title='Top 10 Countries Producing Netflix Content',
                     hover_name='country',
                     color_discrete_sequence=px.colors.sequential.Bluyl)
fig2.write_html("top_countries.html")

# 4. Distribution of Age Ratings (Pie Chart)
rating_counts = netflix_data['rating'].value_counts()
fig3 = px.pie(values=rating_counts.values, names=rating_counts.index, title='Distribution of Age Ratings on Netflix', color_discrete_sequence=px.colors.sequential.Bluyl)
fig3.write_html("distribution_ratings.html")

# 5. Word Cloud of Titles
titles = ' '.join(netflix_data['title'].dropna().values)
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(titles)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Netflix Titles')
plt.savefig('wordcloud_titles.png')
plt.close()

# 6. Bubble Map of Netflix Subscribers by Country (Scatter Geo)
country_revenue = userbase_data.groupby('Country')['Monthly Revenue'].sum().reset_index()
fig4 = px.scatter_geo(country_revenue, locations='Country', locationmode='country names', size='Monthly Revenue',
                      hover_name='Country', size_max=60, title='Bubble Map of Netflix Subscribers by Country', color_discrete_sequence=px.colors.sequential.Bluyl)
fig4.write_html("bubble_map_subscribers.html")

# 7. Subscription Type Distribution (Pie Chart)
fig5 = px.pie(userbase_data, names='Subscription Type', values='Monthly Revenue', title='Subscription Type Distribution', color_discrete_sequence=px.colors.sequential.Bluyl)
fig5.write_html("subscription_type_distribution.html")

# 8. Device Usage by Monthly Revenue (Bar Chart)
fig6 = px.bar(userbase_data, x='Device', y='Monthly Revenue', color='Device', title='Device Usage by Monthly Revenue', color_discrete_sequence=px.colors.sequential.Bluyl)
fig6.write_html("device_usage.html")

# 9. Age Distribution of Users (Histogram)
plt.figure(figsize=(10, 6))
sns.histplot(userbase_data['Age'], kde=True, color='skyblue')
plt.title('Age Distribution of Netflix Users')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.savefig('age_distribution_users.png')
plt.close()

# 10. Sunburst Chart of Genres by Country
netflix_data['genres'] = netflix_data['listed_in'].str.split(', ')
netflix_genres_data = netflix_data.explode('genres')
genre_country_counts = netflix_genres_data.groupby(['country', 'genres']).size().reset_index(name='count')
fig7 = px.sunburst(genre_country_counts, path=['country', 'genres'], values='count', title='Sunburst Chart of Genres by Country', color_discrete_sequence=px.colors.sequential.Bluyl)
fig7.write_html("sunburst_genres_country.html")

# Generate GIF
graph_files = [
    'distribution_movies_tvshows.png',
    'wordcloud_titles.png',
    'age_distribution_users.png'
]

with imageio.get_writer('netflix_analysis.gif', mode='I', duration=2) as writer:
    for filename in graph_files:
        image = imageio.imread(filename)
        writer.append_data(image)

# Download the files
for filename in graph_files:
    files.download(filename)
files.download('netflix_analysis.gif')

# Download the HTML files
html_files = [
    "trend_content_added.html",
    "top_countries.html",
    "distribution_ratings.html",
    "bubble_map_subscribers.html",
    "subscription_type_distribution.html",
    "device_usage.html",
    "sunburst_genres_country.html"
]

for filename in html_files:
    files.download(filename)






<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>