In [1]:
!pip install streamlit -q
!pip install pandas -q
!npm install -g localtunnel

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.9/82.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[K[?25h
added 22 packages, and audited 23 packages in 3s

3 packages are looking for funding
  run `npm fund` for details

1 [33m[1mmoderate[22m[39m severity vulnerability

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [2]:
%%writefile app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from wordcloud import WordCloud

# Set page configuration
st.set_page_config(page_title="Netflix Analysis", page_icon=":clapper:", layout="wide")

# Custom CSS for a more striking visual appearance and logo alignment
st.markdown(
    """
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Bebas+Neue&display=swap');

    body {
        background-color: #141414;
        color: white;
    }
    .main {
        background-color: #141414;
    }
    h1, h2, h3, h4 {
        color: #E50914;
        font-family: 'Bebas Neue', sans-serif;
    }
    .stButton>button {
        background-color: #E50914;
        color: white;
        border-radius: 10px;
        font-size: 16px;
    }
    .stSelectbox, .stSlider, .stMultiSelect, .stDownloadButton {
        background-color: #333333;
        color: white;
    }
    .stDownloadButton>button {
        background-color: #E50914;
        color: white;
        border-radius: 10px;
        font-size: 16px;
    }
    .css-18ni7ap {
        text-align: center;
    }
    .custom-filter-box {
        background-color: #1f1f1f;
        padding: 20px;
        border-radius: 10px;
    }
    .logo-container {
        display: flex;
        justify-content: center;
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Load Data
netflix_data = pd.read_csv('netflix.csv', sep='\t')

# Split values in relevant columns
def split_values(df, column):
    return df[column].str.split(',').apply(lambda x: [item.strip() for item in x] if isinstance(x, list) else [])

netflix_data['genre'] = split_values(netflix_data, 'genre')
netflix_data['cast_media'] = split_values(netflix_data, 'cast_media')
netflix_data['country'] = split_values(netflix_data, 'country')

# Flatten list columns for filters
all_genres = sorted(set([genre for sublist in netflix_data['genre'] for genre in sublist]))
all_countries = sorted(set([country for sublist in netflix_data['country'] for country in sublist]))

# Display Netflix logo at the top and center-align it
netflix_logo_url = 'https://upload.wikimedia.org/wikipedia/commons/0/08/Netflix_2015_logo.svg'
st.markdown('<div class="logo-container"><img src="{}" width="200"></div>'.format(netflix_logo_url), unsafe_allow_html=True)

st.markdown("<h1 style='text-align: center;'>Netflix Content Analysis Dashboard</h1>", unsafe_allow_html=True)

# Static Metrics Section
total_titles = len(netflix_data)
total_movies = len(netflix_data[netflix_data['type'] == 'Movie'])
total_tv_shows = len(netflix_data[netflix_data['type'] == 'TV Show'])
most_common_genre = netflix_data.explode('genre')['genre'].value_counts().idxmax()

st.markdown("<h3 class='custom-header'>Key Metrics</h3>", unsafe_allow_html=True)
st.markdown(f"""
<div style="display: flex; justify-content: space-around; text-align: center;">
    <div>
        <h4>Total Titles</h4>
        <h2 style="color: #ffffff;">{total_titles}</h2>
    </div>
    <div>
        <h4>Total Movies</h4>
        <h2 style="color: #ffffff;">{total_movies}</h2>
    </div>
    <div>
        <h4>Total TV Shows</h4>
        <h2 style="color: #ffffff;">{total_tv_shows}</h2>
    </div>
    <div>
        <h4>Most Common Genre</h4>
        <h2 style="color: #ffffff;">{most_common_genre}</h2>
    </div>
</div>
""", unsafe_allow_html=True)

# Filters Section
st.markdown("<h3 class='custom-header'>Filters</h3>", unsafe_allow_html=True)
with st.expander("Refine your search", expanded=True):
    st.markdown("<div class='custom-filter-box'>", unsafe_allow_html=True)

    st.subheader("Content Type")
    content_type = st.checkbox("Include Movies", value=True), st.checkbox("Include TV Shows", value=True)
    selected_content_types = [ctype for ctype, selected in zip(netflix_data['type'].unique(), content_type) if selected]

    # Release Year as a slider
    year_range = st.slider("Select Release Year Range", int(netflix_data['release_year'].min()), int(netflix_data['release_year'].max()), (2010, 2021))

    genres = st.multiselect("Select Genre(s)", options=all_genres, default=None)
    countries = st.multiselect("Select Country(s)", options=all_countries, default=None)
    st.markdown("</div>", unsafe_allow_html=True)

# Filter Data
filtered_data = netflix_data[(netflix_data['type'].isin(selected_content_types)) &
                             (netflix_data['release_year'] >= year_range[0]) &
                             (netflix_data['release_year'] <= year_range[1])]

if genres:
    filtered_data = filtered_data[filtered_data['genre'].apply(lambda x: any(g in x for g in genres))]

if countries:
    filtered_data = filtered_data[filtered_data['country'].apply(lambda x: any(c in x for c in countries))]

# Tabs for Different Analysis Sections
st.markdown("<h3 class='custom-header'>Detailed Analysis</h3>", unsafe_allow_html=True)

tab1, tab2, tab3, tab4, tab5 = st.tabs(["Content Type & Ratings", "Genre & Directors", "Country of Origin", "Duration", "Keywords"])

with tab1:
    st.header("Breakdown by Content Type")
    content_type_distribution = filtered_data['type'].value_counts()

    fig1 = px.pie(filtered_data, names=content_type_distribution.index, values=content_type_distribution.values,
                  color_discrete_sequence=['#E50914', '#333333'], title="Content Type Distribution on Netflix")
    st.plotly_chart(fig1, use_container_width=True)

    st.header("Content Ratings Overview")
    rating_counts = filtered_data['rating'].value_counts()

    fig8 = px.bar(rating_counts, x=rating_counts.index, y=rating_counts.values,
                  color_discrete_sequence=['#E50914'], title="Distribution of Content Ratings")
    fig8.update_layout(xaxis_title="Rating", yaxis_title="Number of Titles", template="plotly_dark")
    st.plotly_chart(fig8, use_container_width=True)

with tab2:
    st.header("Genre Popularity Insights")
    genres = filtered_data['genre'].explode()
    genre_counts = genres.value_counts().head(10)

    fig2 = px.bar(genre_counts, x=genre_counts.values, y=genre_counts.index, orientation='h',
                  color_discrete_sequence=['#E50914'], title="Top 10 Most Popular Genres on Netflix")
    fig2.update_layout(xaxis_title="Number of Titles", yaxis_title="Genre", template="plotly_dark")
    st.plotly_chart(fig2, use_container_width=True)

    st.header("Leading Directors by Content Volume")
    director_counts = filtered_data['director'].value_counts().head(10)

    fig3 = px.bar(director_counts, x=director_counts.values, y=director_counts.index, orientation='h',
                  color_discrete_sequence=['#E50914'], title="Top 10 Most Frequent Directors on Netflix")
    fig3.update_layout(xaxis_title="Number of Titles Directed", yaxis_title="Director", template="plotly_dark")
    st.plotly_chart(fig3, use_container_width=True)

with tab3:
    st.header("Content Origin by Country")
    country_counts = filtered_data['country'].explode().value_counts().head(10)

    fig4 = px.bar(country_counts, x=country_counts.values, y=country_counts.index, orientation='h',
                  color_discrete_sequence=['#E50914'], title="Top 10 Countries of Origin for Netflix Content")
    fig4.update_layout(xaxis_title="Number of Titles", yaxis_title="Country", template="plotly_dark")
    st.plotly_chart(fig4, use_container_width=True)

    st.header("Global Content Sourcing")

    country_title_counts = filtered_data.explode('country')['country'].value_counts().reset_index()
    country_title_counts.columns = ['Country', 'Title Count']

    total_titles = country_title_counts['Title Count'].sum()
    country_title_counts['Percentage of Total'] = (country_title_counts['Title Count'] / total_titles * 100).round(2)

    fig7 = px.choropleth(
        country_title_counts,
        locations="Country",
        locationmode="country names",
        color="Title Count",
        hover_name="Country",
        hover_data={"Title Count": True, "Percentage of Total": True},
        color_continuous_scale=px.colors.sequential.Reds,
        title="Heatmap of Titles by Country of Origin",
        labels={'Title Count': 'Number of Titles'},
        projection="natural earth"
    )

    fig7.update_geos(
        showcoastlines=True, coastlinecolor="Black",
        showland=True, landcolor="#1f1f1f",
        showocean=True, oceancolor="#141414",
        showlakes=False,
        showrivers=False,
        showcountries=True, countrycolor="Black"
    )

    fig7.update_layout(
        geo=dict(
            bgcolor='#141414'
        ),
        font=dict(
            color='#F5F5F1',
            family='Netflix Sans'
        ),
        title_font=dict(
            size=24,
            color='#E50914',
            family='Netflix Sans'
        ),
        margin={"r":0,"t":40,"l":0,"b":0},
        coloraxis_colorbar=dict(
            title="Number of Titles",
            tickvals=[10, 50, 100, 500],
            ticks="outside",
            lenmode="pixels", len=300,
            yanchor="middle", y=0.5
        )
    )

    fig7.update_traces(
        hovertemplate="<b>%{hovertext}</b><br>" +
                      "Titles: %{z}<br>" +
                      "Percentage: %{customdata[1]}%<extra></extra>"
    )

    st.plotly_chart(fig7, use_container_width=True)

with tab4:
    st.header("Duration Analysis of Content")

    ## Separate Movies and TV Shows for duration analysis
    movies_duration = filtered_data[filtered_data['type'] == 'Movie']
    tv_shows_seasons = filtered_data[filtered_data['type'] == 'TV Show']

    ## Clean duration data
    movies_duration['duration_mins'] = movies_duration['duration'].str.extract('(\d+)').astype(float)
    tv_shows_seasons['seasons'] = tv_shows_seasons['duration'].str.extract('(\d+)').astype(float)

    ## Plot Movies Duration
    fig5 = px.histogram(movies_duration, x='duration_mins', nbins=30, color_discrete_sequence=['#E50914'],
                        title="Distribution of Movie Durations (Minutes)")
    fig5.update_layout(xaxis_title="Duration (Minutes)", yaxis_title="Number of Movies", template="plotly_dark")
    st.plotly_chart(fig5, use_container_width=True)

    ## Plot TV Shows Seasons
    fig6 = px.histogram(tv_shows_seasons, x='seasons', nbins=10, color_discrete_sequence=['#E50914'],
                        title="Distribution of TV Show Seasons")
    fig6.update_layout(xaxis_title="Number of Seasons", yaxis_title="Number of TV Shows", template="plotly_dark")
    st.plotly_chart(fig6, use_container_width=True)

with tab5:
    st.header("Themes and Keywords in Content Descriptions")

    text = " ".join(description for description in filtered_data['description'])
    wordcloud = WordCloud(width=800, height=400, background_color="black", colormap="Reds").generate(text)

    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    st.pyplot(plt)



Writing app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com

In [4]:
!streamlit run app.py & npx localtunnel --port 8501



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.19.117.135:8501[0m
[0m
your url is: https://slimy-parks-punch.loca.lt


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

[34m  Stopping...[0m
^C
