In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os
from datetime import datetime, timedelta
import pytz
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\dhruv\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
apps_df=pd.read_csv('Play Store Data.csv')
reviews_df=pd.read_csv('User Reviews.csv')

In [4]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [5]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [6]:
#Step 2 : Data Cleaning
apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns :
    apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df=apps_df=apps_df[apps_df['Rating']<=5]
reviews_df.dropna(subset=['Translated_Review'],inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)


In [7]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [8]:
#Convert the Installs columns to numeric by removing commas and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)

#Convert Price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [9]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs            int64
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [10]:
merged_df=pd.merge(apps_df,reviews_df,on='App',how='inner')

In [11]:
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.25,1.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.0,0.0
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.5,0.6
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.8,0.9


In [12]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M',''))
    elif 'k' in size:
        return float(size.replace('k',''))/1024
    else:
        return np.nan
apps_df['Size']=apps_df['Size'].apply(convert_size)

In [13]:
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6,500,Free,0.0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [14]:
#Lograrithmic
apps_df['Log_Installs']=np.log(apps_df['Installs'])

In [15]:
apps_df['Reviews']=apps_df['Reviews'].astype(int)

In [16]:
apps_df['Log_Reviews']=np.log(apps_df['Reviews'])

In [17]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews             int64
Size              float64
Installs            int64
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
Log_Installs      float64
Log_Reviews       float64
dtype: object

In [18]:
def rating_group(rating):
    if rating >= 4:
        return 'Top rated app'
    elif rating >=3:
        return 'Above average'
    elif rating >=2:
        return 'Average'
    else:
        return 'Below Average'
apps_df['Rating_Group']=apps_df['Rating'].apply(rating_group)

In [19]:
#Revenue column
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']

In [20]:
sia = SentimentIntensityAnalyzer()

In [21]:
#Polarity Scores in SIA
#Positive, Negative, Neutral and Compound: -1 - Very negative ; +1 - Very positive

In [22]:
review = "This app is amazing! I love the new features."
sentiment_score= sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.42, 'pos': 0.58, 'compound': 0.8516}


In [23]:
review = "This app is very bad! I hate the new features."
sentiment_score= sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.535, 'neu': 0.465, 'pos': 0.0, 'compound': -0.8427}


In [24]:
review = "This app is okay."
sentiment_score= sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.612, 'pos': 0.388, 'compound': 0.2263}


In [25]:
reviews_df['Sentiment_Score']=reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])

In [26]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333,0.9531
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462,0.6597
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875,0.6249
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3,0.6369
5,10 Best Foods for You,Best way,Positive,1.0,0.3,0.6369


In [27]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'],errors='coerce')

In [28]:
apps_df['Year']=apps_df['Last Updated'].dt.year

In [29]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.21034,5.068904,Top rated app,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,Above average,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,Top rated app,0.0,2018


In [30]:
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [31]:
plot_containers=""

In [32]:
# Update the save_plot_as_html function to include time_slots
def save_plot_as_html(fig, filename, insight, time_slots=None):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    # Prepare time slots attribute
    time_attr = ''
    if time_slots:
        time_str = ','.join(time_slots)
        time_attr = f' data-time-slots="{time_str}"'
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')"{time_attr}>
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

In [33]:
plot_width=800
plot_height=600
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}

In [34]:
#Figure 1
category_counts=apps_df['Category'].value_counts().nlargest(10)
fig1=px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x':'Category','y':'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=plot_width,
    height=plot_height
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)

fig1.show()
save_plot_as_html(fig1,"Category Graph 1.html","The top categories on the Play Store are dominated by tools, entertainment, and productivity apps")
            

In [35]:
#Figure 2
type_counts=apps_df['Type'].value_counts()
fig2=px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title='App Type Distribution',
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=plot_width,
    height=plot_height,
)
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    margin=dict(l=10,r=10,t=30,b=10)
)
fig2.show()
save_plot_as_html(fig2,"Type Graph 2.html","Most apps on the Playstore are free, indicating a strategy to attract users first and monetize through ads or in app purchases")

In [36]:
#Figure 3
fig3=px.histogram(
    apps_df,
    x='Rating',
    nbins=20,
    title='Rating Distribution',
    color_discrete_sequence=['#636EFA'],
    width=plot_width,
    height=plot_height,

)
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig3.show()
save_plot_as_html(fig3,"Rating Graph 3.html","Ratings are skewed towards higher values, suggesting that most apps are rated favorably by users")

In [37]:
#Figure 4
sentiment_counts=reviews_df['Sentiment_Score'].value_counts()
fig4=px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x':'Sentiment Score','y':'Count'},
    title='Sentiment Distribution',
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.RdPu,
    width=plot_width,
    height=plot_height,
)
fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig4.show()
save_plot_as_html(fig4,"Sentiment Graph 4.html","Sentiments in reviews show a mix of positive and negative feedback, with a slight lean towards positive sentiments")

In [38]:
#Figure 5
installs_by_category=apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation='h',
    labels={'x':'Installs','y':'Category'},
    title='Installs by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=plot_width,
    height=plot_height,
)
fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig5.show()
save_plot_as_html(fig5,"Installs Graph 5.html","The categories with the most installs are social and communication apps, reflecting their broad appeal and daily usage")

In [39]:
# Updates Per Year Plot
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6 = px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    labels={'x': 'Year', 'y': 'Number of Updates'},
    title='Number of Updates Over the Years',
    color_discrete_sequence=['#AB63FA'],
    width=plot_width,
    height=plot_height
)
fig6.update_layout(
    plot_bgcolor=plot_bg_color,
    paper_bgcolor=plot_bg_color,
    font_color=text_color,
    title_font=title_font,
    xaxis=dict(title_font=axis_font),
    yaxis=dict(title_font=axis_font),
    margin=dict(l=10, r=10, t=30, b=10)
)
fig6.show()
save_plot_as_html(fig6, "Updates Graph 6.html", "Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps.")

In [40]:
#Figure 7
revenue_by_category=apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    labels={'x':'Category','y':'Revenue'},
    title='Revenue by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Greens,
    width=plot_width,
    height=plot_height
)
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig7.show()
save_plot_as_html(fig7,"Revenue Graph 7.html","Categories such as Business and Productivity lead in revenue generation, indicating their monetization potential")

In [41]:
#Figure 8
genre_counts=apps_df['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)
fig8=px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x':'Genre','y':'Count'},
    title='Top Genres',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=plot_width,
    height=plot_height
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig8.show()
save_plot_as_html(fig8,"Genre Graph 8.html","Action and Casual genres are the most common, reflecting users' preference for engaging and easy-to-play games")

In [42]:
#Figure 9
fig9=px.scatter(
    apps_df,
    x='Last Updated',
    y='Rating',
    color='Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=plot_width,
    height=plot_height
)
fig9.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig9.show()
save_plot_as_html(fig9,"Update Graph 9.html","The Scatter Plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates dont always result in better ratings.")

In [43]:
#Figure 10
fig10=px.box(
    apps_df,
    x='Type',
    y='Rating',
    color='Type',
    title='Rating for Paid vs Free Apps',
    color_discrete_sequence=px.colors.qualitative.Pastel,
    width=plot_width,
    height=plot_height
)
fig10.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
fig10.show()
save_plot_as_html(fig10,"Paid Free Graph 10.html","Paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for")

In [44]:
def check_android_version(x):
    try:
        version_str = str(x).split()[0]  # Extract the version part (e.g., '4.0.3')
        parts = version_str.split('.')
        if len(parts) >= 2:
            major_minor = float(f"{parts[0]}.{parts[1]}")  # Combine major and minor
            return major_minor >=4.1
        return False
    except:
        return False

In [45]:
# Define constants for filtering thresholds
MIN_INSTALLS = 10000
MIN_REVENUE = 10000
MIN_SIZE_MB = 15
MAX_APP_NAME_LENGTH = 30
TARGET_CONTENT_RATING = 'Everyone'

# Build a Boolean mask for filtering
mask = (
    (apps_df['Installs'] >= MIN_INSTALLS) &
    (apps_df['Revenue'] >= MIN_REVENUE) &
    (apps_df['Android Ver'].apply(check_android_version)) &
    (apps_df['Size'] > MIN_SIZE_MB) &
    (apps_df['Content Rating'] == TARGET_CONTENT_RATING) &
    (apps_df['App'].str.len() <= MAX_APP_NAME_LENGTH)
)

# Apply the mask to obtain the filtered DataFrame
task1_df = apps_df.loc[mask].copy()

# Select the top 3 categories by count within the filtered DataFrame
top_categories = task1_df['Category'].value_counts().nlargest(3).index
# Filter the DataFrame to include only the top categories
task1_df = task1_df[task1_df['Category'].isin(top_categories)]

# Group data by Category and Type, calculating average installs and revenue
grouped = task1_df.groupby(['Category', 'Type']).agg(
    avg_installs=('Installs', 'mean'),
    avg_revenue=('Revenue', 'mean')
).reset_index()

# Define color mapping for app types
colors = {'Free': 'blue', 'Paid': 'orange'}

# Create a dual-axis plot
fig_task1 = make_subplots(specs=[[{"secondary_y": True}]])

# Plot each app type separately
for app_type in ['Free', 'Paid']:
    # Ensure there's data for each type to avoid empty traces
    type_data = grouped[grouped['Type'] == app_type]
    if not type_data.empty:
        # Bar chart for average installs (primary y-axis)
        fig_task1.add_trace(
            go.Bar(
                x=type_data['Category'],
                y=type_data['avg_installs'],
                name=f'Installs ({app_type})',
                marker_color=colors[app_type]
            ),
            secondary_y=False
        )
        # Line+marker chart for average revenue (secondary y-axis)
        fig_task1.add_trace(
            go.Scatter(
                x=type_data['Category'],
                y=type_data['avg_revenue'],
                name=f'Revenue ({app_type})',
                mode='lines+markers',
                line_color=colors[app_type]
            ),
            secondary_y=True
        )

# Update layout for consistent styling
fig_task1.update_layout(
    title='Avg Installs & Revenue (Free vs Paid) Task 1',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    xaxis_title='Category',
    width=plot_width,
    height=plot_height,
    margin=dict(l=10, r=10, t=60, b=10),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
        
        
    )

    
)

# Optionally, update y-axis titles
fig_task1.update_yaxes(title_text="Average Installs", secondary_y=False)
fig_task1.update_yaxes(title_text="Average Revenue", secondary_y=True)

# Display the figure
fig_task1.show()

# Save the figure as an HTML file and append to the dashboard
save_plot_as_html(
    fig_task1,
    "task1.html",
    "Free apps dominate installs but paid apps generate higher revenue.",
      time_slots=["13:00-14:00"]
)

In [46]:
# Define constants for filtering
MIN_INSTALLS = 1_000_000
EXCLUDE_START_CHARS = ('A', 'C', 'G', 'S')
TOP_N_CATEGORIES = 5

# Mapping dictionary for assigning dummy countries to app categories
category_country_map = {
    'ART_AND_DESIGN': 'France',
    'AUTO_AND_VEHICLES': 'Italy',
    'BEAUTY': 'South Korea',
    'BOOKS_AND_REFERENCE': 'Turkey',
    'BUSINESS': 'Philippines',
    'COMICS': 'Japan',
    'COMMUNICATION': 'United Arab Emirates',
    'DATING': 'Argentina',
    'EDUCATION': 'Germany',
    'ENTERTAINMENT': 'Mexico',
    'EVENTS': 'Poland',
    'FINANCE': 'Singapore',
    'FOOD_AND_DRINK': 'Thailand',
    'HEALTH_AND_FITNESS': 'Australia',
    'HOUSE_AND_HOME': 'Sweden',
    'LIBRARIES_AND_DEMO': 'Finland',
    'LIFESTYLE': 'Netherlands',
    'GAME': 'United Kingdom',
    'FAMILY': 'Russia',
    'MEDICAL': 'Switzerland',
    'SOCIAL': 'Brazil',
    'SHOPPING': 'Indonesia',
    'PHOTOGRAPHY': 'India',
    'SPORTS': 'South Africa',
    'TRAVEL_AND_LOCAL': 'Spain',
    'TOOLS': 'China',
    'PERSONALIZATION': 'Canada',
    'PRODUCTIVITY': 'United States',
    'PARENTING': 'New Zealand',
    'WEATHER': 'Norway',
    'VIDEO_PLAYERS': 'Belgium',
    'NEWS_AND_MAGAZINES': 'Ireland',
    'MAPS_AND_NAVIGATION': 'Malaysia'
}

# Filter apps_df according to the specified criteria
mask = (
    ~apps_df['Category'].str.startswith(EXCLUDE_START_CHARS) &
    (apps_df['Installs'] > MIN_INSTALLS)
)
task2_df = apps_df.loc[mask].copy()

# Get the top 5 categories by frequency (count) within the filtered data
top_categories = task2_df['Category'].value_counts().nlargest(TOP_N_CATEGORIES).index
task2_df = task2_df[task2_df['Category'].isin(top_categories)]

# Aggregate the total installs per Category
aggregated = task2_df.groupby('Category')['Installs'].sum().reset_index()

# Map category values to dummy countries
# Convert category to uppercase for matching and then map; you can set a default value if not found (e.g., 'Unknown')
aggregated['country'] = aggregated['Category'].str.upper().map(category_country_map)
# Optional: if any country mapping is missing, fill it with a default (e.g., "Unknown")
aggregated['country'] = aggregated['country'].fillna("Unknown")

# Clean up category names for better display (e.g., "ART_AND_DESIGN" becomes "Art And Design")
aggregated['Category'] = aggregated['Category'].str.replace('_', ' ').str.title()

# Create the choropleth map using Plotly Express
fig_task2 = px.choropleth(
    aggregated,
    locations='country',
    locationmode='country names',
    scope='world',
    labels={'Installs': 'Total Installs'},
    hover_data=['Category'],
    color='Installs',
    hover_name='Category',
    title='Global Installs by Category (Dummy Countries) Task 2',
    color_continuous_scale='Viridis',
    template='plotly_dark',
    width=plot_width,
    height=plot_height
    
)

# Customize geographic projection and appearance
fig_task2.update_geos(
    visible=True,
    resolution=50,
    projection_type="equirectangular",
    projection_scale=1.5,
    coastlinecolor="white",
    landcolor="black",
    showcountries=True,
    showlakes=False
)

# Update layout for background and margins
fig_task2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    margin=dict(l=10, r=10, t=40, b=10),
    title_font=dict(size=20),
    annotations=[
        dict(
            x=0.5,
            y=-0.1,
            showarrow=False,
            text="Categories mapped to representative countries",
            xref="paper",
            yref="paper"
        )
    ]
)
aggregated['highlight'] = aggregated['Installs'] > 1_000_000
hover_template = (
    '<b>%{hovertext}</b><br>' +
    'Country: %{location}<br>' +
    'Installs: %{z:,}<br>' +
    '%{customdata[0]}<extra></extra>'
)
aggregated['highlight_label'] = aggregated['highlight'].map(
    lambda x: '🔥 High Install Volume >1Million' if x else ''
)
fig_task2 = px.choropleth(
    aggregated,
    locations='country',
    locationmode='country names',
    scope='world',
    labels={'Installs': 'Total Installs'},
    hover_data=['highlight_label'],
    hover_name='Category',
    color='Installs',
    title='Global Installs by Category (Dummy Countries) Task 2',
    color_continuous_scale='Viridis',
    template='plotly_dark',
    width=plot_width,
    height=plot_height
    ,

)

# Set hover template with highlight
fig_task2.update_traces(
    hovertemplate=hover_template
)

# Optional debugging: print some details
print("Task2 DataFrame size:", task2_df.size)
print("Aggregated Data:")
print(aggregated)
print("Unique countries in mapping:", aggregated['country'].unique())

# Show and save the interactive choropleth map
fig_task2.show()
save_plot_as_html(
    fig_task2,
    "task2.html",
    "Business and Productivity categories show highest installs",
        time_slots=["18:00-20:00"]
    
)


Task2 DataFrame size: 16398
Aggregated Data:
          Category     Installs        country  highlight  \
0           Family   9685000000         Russia       True   
1  Personalization   2015000000         Canada       True   
2      Photography   9660000000          India       True   
3     Productivity  12395000000  United States       True   
4            Tools  11325000000          China       True   

                   highlight_label  
0  🔥 High Install Volume >1Million  
1  🔥 High Install Volume >1Million  
2  🔥 High Install Volume >1Million  
3  🔥 High Install Volume >1Million  
4  🔥 High Install Volume >1Million  
Unique countries in mapping: ['Russia' 'Canada' 'India' 'United States' 'China']


In [47]:
# Define constants for filters
MIN_RATING = 4.0
MIN_SIZE_MB = 10
FILTER_MONTH = 1  # January

# Ensure 'Last Updated' is a datetime (if not already)
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# Apply filters: rating, size, and Last Updated month
mask = (
    (apps_df['Size'] >= MIN_SIZE_MB) &
    (apps_df['Last Updated'].dt.month == FILTER_MONTH)
)
task3_df = apps_df.loc[mask].copy()

# Select top 10 categories by total installs within the filtered data
top_categories = task3_df.groupby('Category')['Installs'].sum().nlargest(10).index
task3_df = task3_df[task3_df['Category'].isin(top_categories)]

# Group data by Category: calculate average rating and total reviews
grouped = task3_df.groupby('Category').agg(
    avg_rating=('Rating', 'mean'),
    total_reviews=('Reviews', 'sum')
).reset_index()

grouped = grouped[grouped['avg_rating'] >= MIN_RATING]

# Create a grouped bar chart using Plotly Graph Objects
fig_task3 = go.Figure()

# Add average rating bar trace
fig_task3.add_trace(
    go.Bar(
        x=grouped['Category'],
        y=grouped['avg_rating'],
        name='Avg Rating',
        marker_color='teal'
    )
)

# Add total reviews bar trace
fig_task3.add_trace(
    go.Bar(
        x=grouped['Category'],
        y=grouped['total_reviews'],
        name='Total Reviews',
        marker_color='magenta'
    )
)

# Update layout for clarity and consistency with a dark theme
fig_task3.update_layout(
    title='Rating & Reviews by Category Task 3',
    xaxis_title='App Category',
    yaxis_title='Value',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    barmode='group',
    margin=dict(l=10, r=10, t=60, b=10),
 
   legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    width=plot_width,
    height=plot_height
)

# Save and display the figure
save_plot_as_html(
    fig_task3,
    "task3.html",
    "Productivity apps lead in ratings and reviews.",
     time_slots=["15:00-17:00"]

)
fig_task3.show()


In [48]:
# Define filter constants
MIN_REVIEWS = 10
RATING_THRESHOLD = 4.0
MIN_CATEGORY_COUNT = 50

# Filter the apps dataframe based on the task criteria:
# 1. App name should contain the letter 'C' (case insensitive)
# 2. Apps must have at least MIN_REVIEWS reviews
# 3. App rating must be below RATING_THRESHOLD
mask = (
    apps_df['App'].str.contains('C', case=False) &
    (apps_df['Reviews'] >= MIN_REVIEWS) &
    (apps_df['Rating'] < RATING_THRESHOLD)
)
task4_df = apps_df.loc[mask].copy()

# Filter out categories that do not have more than MIN_CATEGORY_COUNT apps
category_counts = task4_df['Category'].value_counts()
valid_categories = category_counts[category_counts > MIN_CATEGORY_COUNT].index
task4_df = task4_df[task4_df['Category'].isin(valid_categories)]

# Create the violin plot using Plotly Express to visualize rating distribution by Category
fig_task4 = px.violin(
    task4_df,
    x='Category',
    y='Rating',
    title='Rating Distribution by Category Task 4',
    color='Category',
    width=plot_width,
    height=plot_height
)

# Update layout for dark theme styling
fig_task4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    title_font=dict(size=18)
,
    xaxis=dict(title_font=dict(size=12)),
    yaxis=dict(title_font=dict(size=12)),
    margin=dict(l=10, r=10, t=60, b=10),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Save and display the plot
save_plot_as_html(fig_task4, "task4.html", "Communication apps show varied ratings.",
                  time_slots=["16:00-18:00"]
)
fig_task4.show()


In [49]:
# Define filter constants for clarity and easier updates
MIN_RATING = 3.5
TARGET_CATEGORY = 'GAME'
MIN_INSTALLS = 50000

# Filter the DataFrame for bubble chart
mask = (
    (apps_df['Rating'] > MIN_RATING) &
    (apps_df['Category'] == TARGET_CATEGORY) &
    (apps_df['Installs'] > MIN_INSTALLS)
)
task5_df = apps_df.loc[mask].copy()

# Create the bubble chart using Plotly Express
fig_task5 = px.scatter(
    task5_df,
    x='Size',
    y='Rating',
    size='Installs',    # Bubble size represents number of installs
    color='Installs',   # Color enhances visual differentiation by install count
    title='Size vs Rating (Games) Task 5',
    width=plot_width,
    height=plot_height,
    size_max=60,       # Maximum bubble size
    color_continuous_scale=px.colors.sequential.Plasma,
    hover_name='App',    # Show app name on hover
)

# Update layout to improve readability on a dark theme
fig_task5.update_layout(
    title_x=0.5,  # Center the title
    title_y=0.95,  # Adjust title position
    
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='App Size (MB)',
    yaxis_title='Average Rating'
,

    # Adjust font sizes for better readability
    title_font=dict(size=18),
    xaxis=dict(title_font=dict(size=12)),
    yaxis=dict(title_font=dict(size=12)),
    legend=dict(font=dict(size=12)),
    margin=dict(l=50, r=50, t=50, b=50),


)

# Save and display the interactive plot
save_plot_as_html(fig_task5, "task5.html", "Larger game apps tend to have higher ratings.",
time_slots=["17:00-19:00"]
    
)
fig_task5.show()


In [50]:
# Ensure the 'Last Updated' column is in datetime format
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# Fill missing genres with an empty string
apps_df['Genres'] = apps_df['Genres'].fillna('')

# Define filter thresholds as constants
MIN_INSTALLS = 100000
MIN_REVIEWS = 1000
EXCLUDED_GENRE_START_CHARS = ('A', 'F', 'E', 'G', 'I', 'K')

# Calculate the threshold date for "updated within the last year"
one_year_ago = 2018

# Print the initial number of rows in the dataframe
print("Rows before filtering:", len(apps_df))

# Apply filters:
# 1. Last Updated within the last year
# 2. Installs >= MIN_INSTALLS
# 3. Reviews > MIN_REVIEWS
# 4. Genres does NOT start with excluded characters
mask = (
    (apps_df['Last Updated'].dt.year >= one_year_ago) &
    (apps_df['Installs'] >= MIN_INSTALLS) &
    (apps_df['Reviews'] > MIN_REVIEWS) &
    (~apps_df['Genres'].str.startswith(EXCLUDED_GENRE_START_CHARS))
)
task6_df = apps_df.loc[mask].copy()

# Print the number of rows after filtering
print("Rows after filtering:", len(task6_df))

# Compute correlation matrix if data exists, otherwise create an empty matrix
if task6_df.empty:
    # Create an empty correlation matrix with NaN values
    corr_matrix = pd.DataFrame(
        np.nan, 
        index=['Installs', 'Rating', 'Reviews'],
        columns=['Installs', 'Rating', 'Reviews']
    )
    title = 'Correlation Matrix (No data met the filter criteria)'
else:
    corr_matrix = task6_df[['Installs', 'Rating', 'Reviews']].corr()
    title = 'Correlation Matrix Task 6'

print("Correlation Matrix:\n", corr_matrix)

# Create a heatmap using Plotly Express
fig_task6 = px.imshow(
    corr_matrix,
    title=title,
    text_auto=True,
    width=plot_width,
    height=plot_height,
    aspect='auto',
    color_continuous_scale='Blues'
,
    labels=dict(x='Features', y='Features', color='Correlation Coefficient'),
    x=['Installs', 'Rating', 'Reviews'],

)

# Update layout for dark theme styling
fig_task6.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')
,
    xaxis=dict(showgrid=False, zeroline=False),
    yaxis=dict(showgrid=False, zeroline=False)
,
    coloraxis_colorbar=dict(
        title='Correlation Coefficient',
        titleside='right',
        tickmode='array',
        tickvals=[-1, -0.5, 0, 0.5, 1],
        ticktext=['-1', '-0.5', '0', '0.5', '1']
    )
,
    margin=dict(l=10, r=10, t=30, b=10),

    
)

# Save the heatmap to an HTML file and show the figure
save_plot_as_html(fig_task6, "task6.html", "Installs and reviews are strongly correlated.",
time_slots=["14:00-16:00"]

)
fig_task6.show()

Rows before filtering: 8892
Rows after filtering: 3098
Correlation Matrix:
           Installs    Rating   Reviews
Installs  1.000000  0.025096  0.595241
Rating    0.025096  1.000000  0.078027
Reviews   0.595241  0.078027  1.000000


In [51]:
# Enhanced filtering with explicit copy and whitespace handling
task7_df = apps_df.loc[
    (apps_df['Content Rating'] == 'Teen') &
    (apps_df['App'].str.strip().str.startswith('E')) &  # Case-sensitive check after stripping whitespace
    (apps_df['Installs'] > 10000)
].copy()

print("Rows after initial filtering:", len(task7_df))
print("Unique categories:", task7_df['Category'].unique())

if not task7_df.empty:
    # Enhanced datetime handling with NaT removal
    task7_df['Last Updated'] = pd.to_datetime(task7_df['Last Updated'], errors='coerce')
    task7_df = task7_df.dropna(subset=['Last Updated'])  # Remove invalid dates
    
    if task7_df.empty:
        print("No valid dates remaining after filtering")
        # Create empty plot with consistent styling
        fig_task7 = px.line(title="No Valid Dates in Filtered Data")
        fig_task7.update_layout(
            plot_bgcolor='black',
            paper_bgcolor='black',
            font_color='white',
            title_x=0.5
        )
        save_plot_as_html(fig_task7, "task7.html", "No valid dates in filtered data")
        fig_task7.show()
    else:
        # Temporal analysis with proper sorting
        task7_df.sort_values('Last Updated', inplace=True)
        
        # Enhanced grouping with frequency validation
        monthly_data = (task7_df
                        .groupby(['Category', pd.Grouper(key='Last Updated', freq='ME')])
                        ['Installs']
                        .sum()
                        .reset_index())
        
        # Complete time series generation with explicit date boundaries
        min_date = monthly_data['Last Updated'].min()
        max_date = monthly_data['Last Updated'].max()
        complete_dates = pd.date_range(start=min_date, end=max_date, freq='ME')
        
        # Create comprehensive index with all category-date combinations
        complete_index = pd.MultiIndex.from_product(
            [task7_df['Category'].unique(), complete_dates],
            names=['Category', 'Last Updated']
        )
        
        # Reindex with fill for missing values
        monthly_data = (monthly_data
                        .set_index(['Category', 'Last Updated'])
                        .reindex(complete_index, fill_value=0)
                        .reset_index())
        
        # Growth calculation with edge case handling
        monthly_data['prev_installs'] = monthly_data.groupby('Category')['Installs'].shift(1)
        monthly_data['growth_pct'] = (
            (monthly_data['Installs'] - monthly_data['prev_installs']) / 
            monthly_data['prev_installs'].replace(0, np.nan)
        ) * 100
        
        # Handle new product launches (0 → >0 installs)
        monthly_data.loc[
            (monthly_data['prev_installs'] == 0) & 
            (monthly_data['Installs'] > 0), 'growth_pct'
        ] = 100  # Mark as 100% growth for new entries

        # Optimized plotting with pre-filtered growth data
        significant_growth = monthly_data[monthly_data['growth_pct'] > 20]
        
        # Create visualization with improved annotations
        fig_task7 = px.line(
            monthly_data,
            x='Last Updated',
            y='Installs',
            color='Category',
            title='Task7',
            markers=True,
            labels={'Installs': 'Total Installs', 'Last Updated': 'Month'},
            width=plot_width,
            height=plot_height,
        )
        
        # Add growth highlights using vectorized operations
        shapes = [{
            'type': 'rect',
            'x0': row['Last Updated'].replace(day=1),
            'x1': row['Last Updated'] + pd.offsets.MonthEnd(0),
            'y0': 0,
            'y1': row['Installs'],
            'fillcolor': 'darkgreen',
            'line': {'width': 5, 'color': 'darkgreen'},
            'opacity': 0.7,
            'layer': 'below'
        } for _, row in significant_growth.iterrows()]

        # Enhanced layout configuration
        fig_task7.update_layout(
            plot_bgcolor='black',
            paper_bgcolor='black',
            font_color='white',
            xaxis={
                'tickformat': '%b\n%Y',
                'dtick': 'M3',
                'tickangle': 45,
                'gridcolor': '#404040'

            },
            yaxis={
                'rangemode': 'tozero',
                'gridcolor': '#404040'

            },
            shapes=shapes,
            margin=dict(l=60, r=30, t=120, b=100),
            annotations=[{
                'text': "Green highlights indicate >20% month-over-month growth",
                'font': {'color': '#00FF00', 'size': 14},
                'x': 0.5,
                'y': -0.8,
                'xref': 'paper',
                'yref': 'paper',
                'showarrow': False
            }],
            hovermode='x unified',
            legend={
                'orientation': 'h',
                'yanchor': 'bottom',
                'y': 1.02,
                'xanchor': 'right',
                'x': 1
            }
        )

        # Add interactive components
        fig_task7.update_xaxes(
            rangeslider_visible=True,
            rangeselector={
                'buttons': [
                    
                    {'count': 6, 'label': '6m', 'step': 'month', 'stepmode': 'backward'},
                    {'count': 1, 'label': '1y','step':'year','stepmode': 'backward'},
                    {'count': 2, 'label': '2y','step':'year','stepmode': 'backward'},
                    {'step': 'all'}
                ],
                'bgcolor': '#303030'
            }
        )
        category_order = (task7_df.groupby('Category')['Installs'].sum()
                  .sort_values(ascending=False).index.tolist())
        fig_task7.update_traces(marker=dict(line=dict(width=1)),
                        selector=dict(type='scatter'),
                        line_shape='spline')
    
        # Update layout for better visibility

        # Save and display output
        save_plot_as_html(fig_task7, "task7.html", "Growth patterns in Teen apps starting with E",
        time_slots=["18:00-21:00"]

                          )
        fig_task7.show()

else:
    print("No data matching filters")
    # Create empty plot with consistent styling
    fig_task7 = px.line(title="No Matching Data Available")
    fig_task7.update_layout(
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title_x=0.5
    )

    save_plot_as_html(fig_task7, "task7.html", "No apps met the criteria task7",time_slots=["18:00-21:00"])
    fig_task7.show()

Rows after initial filtering: 28
Unique categories: ['HEALTH_AND_FITNESS' 'GAME' 'SHOPPING' 'FAMILY' 'NEWS_AND_MAGAZINES'
 'SOCIAL' 'SPORTS' 'PHOTOGRAPHY']


In [52]:
plot_containers_split=plot_containers.split('</div>')

In [53]:
if len(plot_containers_split) > 1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers

In [54]:
dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444;
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            padding: 20px;
        }}
        .plot-container {{
            display: none; /* Initially hidden */
            border: 2px solid #555;
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0,0,0,0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container:hover .insights {{
            display: block;
        }}
    </style>
    <script>
        function openPlot(filename) {{
            window.open(filename, '_blank');
        }}

        function parseTime(timeStr) {{
            const [hours, minutes] = timeStr.split(':').map(Number);
            return hours * 60 + minutes;
        }}

        function checkTimeSlots() {{
            const now = new Date();
            const currentMinutes = now.getHours() * 60 + now.getMinutes();

            document.querySelectorAll('.plot-container').forEach(container => {{
                const slots = container.dataset.timeSlots;
                let isVisible = false;

                if (slots) {{
                    const timeIntervals = slots.split(',');
                    for (const interval of timeIntervals) {{
                        const [startStr, endStr] = interval.split('-');
                        const start = parseTime(startStr.trim());
                        const end = parseTime(endStr.trim());
                        if (currentMinutes >= start && currentMinutes <= end) {{
                            isVisible = true;
                            break;
                        }}
                    }}
                }} else {{
                    // Show if no time slots specified
                    isVisible = true;
                }}

                container.style.display = isVisible ? 'block' : 'none';
            }});
        }}

        // Initial check and update 
        checkTimeSlots();
        setInterval(checkTimeSlots, 100);
    </script>
</head>
<body>
    <div class="header">
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
        <h1>Google Play Store Reviews Analytics</h1>
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
    </div>
    <div class="container">
        {plots}
    </div>
</body>
</html>
"""

In [55]:
final_html=dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)

In [56]:
dashboard_path=os.path.join(html_files_path,"web page.html")

In [57]:
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

In [58]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True