# Importing libraries

In [337]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import hashlib
import nltk
import webbrowser
import os
import datetime
import pytz

In [338]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\push1\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Data Loading

In [339]:
apps_df=pd.read_csv('Play Store Data.csv')
reviews_df=pd.read_csv('User Reviews.csv')

# Data Cleaning

In [340]:
#Step 2 : Data Cleaning
apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns :
    apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df=apps_df=apps_df[apps_df['Rating']<=5]
reviews_df.dropna(subset=['Translated_Review'],inplace=True)

In [341]:
#Convert the Installs columns to numeric by removing commas and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



In [342]:
#Convert Price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



In [343]:
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6M,500,Free,0.0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,5000,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,1000,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [344]:
reviews_df

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.000000,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.250000,0.288462
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.400000,0.875000
4,10 Best Foods for You,Best idea us,Positive,1.000000,0.300000
5,10 Best Foods for You,Best way,Positive,1.000000,0.300000
...,...,...,...,...,...
64222,Housing-Real Estate & Property,Most ads older many agents ..not much owner po...,Positive,0.173333,0.486667
64223,Housing-Real Estate & Property,"If photos posted portal load, fit purpose. I'm...",Positive,0.225000,0.447222
64226,Housing-Real Estate & Property,"Dumb app, I wanted post property rent give opt...",Negative,-0.287500,0.250000
64227,Housing-Real Estate & Property,I property business got link SMS happy perform...,Positive,0.800000,1.000000


# Data Transformation

In [345]:
merged_df=pd.merge(apps_df,reviews_df,on='App',how='inner')

In [346]:
merged_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.250000,1.000000
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725000,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.000000,0.000000
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.500000,0.600000
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.800000,0.900000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59119,A+ Gallery - Photos & Videos,PHOTOGRAPHY,4.5,223941,Varies with device,10000000,Free,0.0,Everyone,Photography,"August 6, 2018",Varies with device,Varies with device,Overall great app. Best gallery seen far,Positive,0.475000,0.512500
59120,A+ Gallery - Photos & Videos,PHOTOGRAPHY,4.5,223941,Varies with device,10000000,Free,0.0,Everyone,Photography,"August 6, 2018",Varies with device,Varies with device,"Recommended, 100% love it, keep good work dev ...",Positive,0.566667,0.733333
59121,A+ Gallery - Photos & Videos,PHOTOGRAPHY,4.5,223941,Varies with device,10000000,Free,0.0,Everyone,Photography,"August 6, 2018",Varies with device,Varies with device,Too much ads,Positive,0.200000,0.200000
59122,A+ Gallery - Photos & Videos,PHOTOGRAPHY,4.5,223941,Varies with device,10000000,Free,0.0,Everyone,Photography,"August 6, 2018",Varies with device,Varies with device,Just allow time ...,Neutral,0.000000,0.000000


In [347]:
merged_df.isnull().sum()

App                       0
Category                  0
Rating                    0
Reviews                   0
Size                      0
Installs                  0
Type                      0
Price                     0
Content Rating            0
Genres                    0
Last Updated              0
Current Ver               0
Android Ver               0
Translated_Review         0
Sentiment                 0
Sentiment_Polarity        0
Sentiment_Subjectivity    0
dtype: int64

In [348]:
# So none of the columns have any null values

In [349]:
merged_df['Rating'].value_counts()

4.4    10487
4.5     8912
4.3     8130
4.6     7957
4.2     6812
4.7     5075
4.1     3946
4.0     2529
3.9     1995
3.8      857
3.7      791
4.8      374
3.5      323
3.4      240
3.6      216
3.1      120
4.9       98
2.7       65
3.3       64
3.0       63
3.2       39
2.6       31
Name: Rating, dtype: int64

In [350]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M',''))
    elif 'k' in size:
        return float(size.replace('k',''))/1024
    else:
        return np.nan
apps_df['Size']=apps_df['Size'].apply(convert_size)

In [351]:
#Lograrithmic
apps_df['Log_Installs']=np.log(apps_df['Installs'])

In [352]:
apps_df['Reviews']=apps_df['Reviews'].astype(int)

In [353]:
apps_df['Log_Reviews']=np.log(apps_df['Reviews'])

In [354]:
def rating_group(rating):
    if rating >= 4:
        return 'Top rated app'
    elif rating >=3:
        return 'Above average'
    elif rating >=2:
        return 'Average'
    else:
        return 'Below Average'
apps_df['Rating_Group']=apps_df['Rating'].apply(rating_group)

In [355]:
#Revenue column
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']

In [356]:
SIA = SentimentIntensityAnalyzer()

In [357]:
#Polarity Scores in SIA
#Positive, Negative, Neutral and Compound: -1 - Very negative ; +1 - Very positive

In [358]:
review = "This app is amazing! I love the new features."
sentiment_score= SIA.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.42, 'pos': 0.58, 'compound': 0.8516}


In [359]:
reviews_df['Sentiment_Score']=reviews_df['Translated_Review'].apply(lambda x: SIA.polarity_scores(str(x))['compound'])

In [360]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'],errors='coerce')

In [361]:
apps_df['Year']=apps_df['Last Updated'].dt.year

In [362]:
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.210340,5.068904,Top rated app,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,Above average,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,Top rated app,0.0,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6,500,Free,0.0,Everyone,Education,2017-06-18,1.0.0,4.1 and up,6.214608,1.945910,Top rated app,0.0,2017
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,Everyone,Education,2017-07-25,1.48,4.1 and up,8.517193,3.637586,Top rated app,0.0,2017
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,Everyone,Education,2018-07-06,1.0,4.1 and up,4.605170,1.386294,Top rated app,0.0,2018
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,Mature 17+,Books & Reference,2015-01-19,Varies with device,Varies with device,6.907755,4.736198,Top rated app,0.0,2015


In [363]:
apps_df_extract=apps_df[:5]

In [364]:
apps_df_extract

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.21034,5.068904,Top rated app,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,Above average,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,Top rated app,0.0,2018


In [365]:
# Export to Excel
excel_file = "apps_df.xlsx"  # Replace with desired Excel file name
apps_df_extract.to_excel(excel_file, index=False)

print(f"CSV successfully converted to {excel_file}")

CSV successfully converted to apps_df.xlsx


# Data Visualisation using Plotly

In [366]:
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)


In [367]:
plot_containers=""

In [368]:
# Save each Plotly figure to an HTML file
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    # Append the plot and its insight to plot_containers
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

In [369]:
plot_width=400
plot_height=300
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}

Figure 1

In [370]:
category_counts=apps_df['Category'].value_counts().nlargest(10)
fig1=px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x':'Category','y':'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig1,"Category Graph 1.html","The top categories on the Play Store are dominated by tools, entertainment, and productivity apps")
            

Figure 2

In [371]:
#Figure 2
type_counts=apps_df['Type'].value_counts()
fig2=px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title='App Type Distribution',
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=400,
    height=300
)
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig2.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig2,"Type Graph 2.html","Most apps on the Playstore are free, indicating a strategy to attract users first and monetize through ads or in app purchases")


Figure 3

In [372]:
#Figure 3
fig3=px.histogram(
    apps_df,
    x='Rating',
    nbins=20,
    title='Rating Distribution',
    color_discrete_sequence=['#636EFA'],
    width=400,
    height=300
)
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig3.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig3,"Rating Graph 3.html","Ratings are skewed towards higher values, suggesting that most apps are rated favorably by users")


Figure 4

In [373]:
#Figure 4
sentiment_counts=reviews_df['Sentiment_Score'].value_counts()
fig4=px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x':'Sentiment Score','y':'Count'},
    title='Sentiment Distribution',
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.RdPu,
    width=400,
    height=300
)
fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig4.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig4,"Sentiment Graph 4.html","Sentiments in reviews show a mix of positive and negative feedback, with a slight lean towards positive sentiments")


Figure 5

In [374]:
#Figure 5
installs_by_category=apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation='h',
    labels={'x':'Installs','y':'Category'},
    title='Installs by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)
fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig5.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig5,"Installs Graph 5.html","The categories with the most installs are social and communication apps, reflecting their broad appeal and daily usage")



invalid value encountered in cast



Figure 6

In [375]:
# Updates Per Year Plot
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6 = px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    labels={'x': 'Year', 'y': 'Number of Updates'},
    title='Number of Updates Over the Years',
    color_discrete_sequence=['#AB63FA'],
    width=plot_width,
    height=plot_height
)
fig6.update_layout(
    plot_bgcolor=plot_bg_color,
    paper_bgcolor=plot_bg_color,
    font_color=text_color,
    title_font=title_font,
    xaxis=dict(title_font=axis_font),
    yaxis=dict(title_font=axis_font),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig6, "Updates Graph 6.html", "Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps.")


Figure 7

In [376]:
#Figure 7
revenue_by_category=apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    labels={'x':'Category','y':'Revenue'},
    title='Revenue by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Greens,
    width=400,
    height=300
)
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig7.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig7,"Revenue Graph 7.html","Categories such as Business and Productivity lead in revenue generation, indicating their monetization potential")


Figure 8

In [377]:
#Figure 8
genre_counts=apps_df['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)
fig8=px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x':'Genre','y':'Count'},
    title='Top Genres',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=400,
    height=300
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig8.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig8,"Genre Graph 8.html","Action and Casual genres are the most common, reflecting users' preference for engaging and easy-to-play games")


Figure 9

In [378]:
#Figure 9
fig9=px.scatter(
    apps_df,
    x='Last Updated',
    y='Rating',
    color='Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=400,
    height=300
)
fig9.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig9.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig9,"Update Graph 9.html","The Scatter Plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates dont always result in better ratings.")


Figure 10

In [379]:
#Figure 10
fig10=px.box(
    apps_df,
    x='Type',
    y='Rating',
    color='Type',
    title='Rating for Paid vs Free Apps',
    color_discrete_sequence=px.colors.qualitative.Pastel,
    width=400,
    height=300
)
fig10.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig10.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig10,"Paid Free Graph 10.html","Paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for")


In [380]:
plot_containers_split=plot_containers.split('</div>')

In [381]:
if len(plot_containers_split) > 1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers


# Webpage Styling

In [382]:
dashboard_html= """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name=viewport" content="width=device-width,initial-scale-1.0">
    <title> Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify_content: center;
            padding: 20px;
        }}
        .plot-container {{
             border: 2px solid #555;
             margin: 10px;
             padding: 10px;
             width: 100%; /* Allow full width */
             max-width: 1200px; /* Prevent it from stretching too much */
             height: auto; /* Auto adjust height */
             overflow: hidden;
             position: relative;
             cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0, 0, 0, 0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container .plot {{
            width: 100%;
            height: auto; 
        }}
        .plot-container: hover .insights {{
            display: block;
        }}
        </style>
        <script>
            function openPlot(filename) {{
                window.open(filename, '_blank');
                }}
        </script>
    </head>
    <body>
        <div class= "header">
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
            <h1>Google Play Store Reviews Analytics</h1>
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
        </div>
        <div class="container">
            {plots}
        </div>
    </body>
    </html>
    """

# Task 1

In [383]:
# Filter apps with more than 1,000 reviews
apps_df_t1 = apps_df[apps_df['Reviews'] > 1000]

In [384]:
apps_df_t1

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.700000,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.000000,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0,2018
7,Infinite Painter,ART_AND_DESIGN,4.1,36815,29.000000,1000000,Free,0.0,Everyone,Art & Design,2018-06-14,6.1.61.1,4.2 and up,13.815511,10.513661,Top rated app,0.0,2018
8,Garden Coloring Book,ART_AND_DESIGN,4.4,13791,33.000000,1000000,Free,0.0,Everyone,Art & Design,2017-09-20,2.9.2,3.0 and up,13.815511,9.531771,Top rated app,0.0,2017
10,Text on Photo - Fonteee,ART_AND_DESIGN,4.4,13880,28.000000,1000000,Free,0.0,Everyone,Art & Design,2017-10-27,1.0.4,4.1 and up,13.815511,9.538204,Top rated app,0.0,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10809,Castle Clash: RPG War and Strategy FR,FAMILY,4.7,376223,24.000000,1000000,Free,0.0,Everyone,Strategy,2018-07-18,1.4.2,4.1 and up,13.815511,12.837937,Top rated app,0.0,2018
10815,Golden Dictionary (FR-AR),BOOKS_AND_REFERENCE,4.2,5775,4.900000,500000,Free,0.0,Everyone,Books & Reference,2018-07-19,7.0.4.6,4.2 and up,13.122363,8.661294,Top rated app,0.0,2018
10826,Frim: get new friends on local chat rooms,SOCIAL,4.0,88486,,5000000,Free,0.0,Mature 17+,Social,2018-03-23,Varies with device,Varies with device,15.424948,11.390600,Top rated app,0.0,2018
10832,FR Tides,WEATHER,3.8,1195,0.568359,100000,Free,0.0,Everyone,Weather,2014-02-16,6.0,2.1 and up,11.512925,7.085901,Above average,0.0,2014


In [385]:
apps_df_t1['Rating_Group'].value_counts()

Top rated app    4696
Above average     760
Average            27
Below Average       2
Name: Rating_Group, dtype: int64

In [386]:
# Step 2: Identify the top 5 categories by app count
top_categories = apps_df_t1['Category'].value_counts().nlargest(5).index

In [387]:
top_categories

Index(['FAMILY', 'GAME', 'TOOLS', 'PHOTOGRAPHY', 'PRODUCTIVITY'], dtype='object')

In [388]:
# Step 3: Filter the data for only top 5 categories
apps_df_t1 = apps_df_t1[apps_df_t1['Category'].isin(top_categories)]

In [389]:
apps_df_t1.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
1653,ROBLOX,GAME,4.5,4447388,67.0,100000000,Free,0.0,Everyone 10+,Adventure;Action & Adventure,2018-07-31,2.347.225742,4.1 and up,18.420681,15.307828,Top rated app,0.0,2018
1654,Subway Surfers,GAME,4.5,27722264,76.0,1000000000,Free,0.0,Everyone 10+,Arcade,2018-07-12,1.90.0,4.1 and up,20.723266,17.137746,Top rated app,0.0,2018
1655,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,2018-07-05,1.129.0.2,4.1 and up,20.030119,16.925762,Top rated app,0.0,2018
1656,Solitaire,GAME,4.7,254258,23.0,10000000,Free,0.0,Everyone,Card,2018-08-01,2.137.0,4.1 and up,16.118096,12.446105,Top rated app,0.0,2018
1657,Bubble Shooter,GAME,4.5,148897,46.0,10000000,Free,0.0,Everyone,Casual,2018-07-17,1.20.1,4.0.3 and up,16.118096,11.91101,Top rated app,0.0,2018


In [390]:
# Step 4: Merge with reviews_df to get sentiment scores
merged_df_t1 = pd.merge(apps_df_t1, reviews_df, on="App", how="inner")

In [391]:
# Step 5: Define sentiment groups based on compound score
def classify_sentiment(score):
    if score >= 0.05:
        return "Positive"
    elif score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

In [392]:
merged_df_t1["Sentiment"] = merged_df_t1["Sentiment_Score"].apply(classify_sentiment)

In [393]:
merged_df_t1.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,...,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"If get free lives refill, continue accumulate ...",Positive,0.374411,0.556987,0.9623
1,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,My original rating 01/2015 5 Stars still holdi...,Positive,0.25,0.475,0.9039
2,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"This good time passing game. However, I like l...",Positive,0.200926,0.437963,0.9325
3,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"Fun first, spending two weeks level makes want...",Positive,0.183333,0.296825,0.8885
4,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,Please get rid amount pop ups love things holy...,Positive,0.319444,0.6,0.9835


In [394]:
merged_df_t1.tail()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,...,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
28979,Food Network,FAMILY,4.1,7823,,500000,Free,0.0,Teen,Entertainment,...,13.122363,8.964823,Top rated app,0.0,2018,Love get caught favorite shows.,Positive,0.5,0.8,0.802
28980,Food Network,FAMILY,4.1,7823,,500000,Free,0.0,Teen,Entertainment,...,13.122363,8.964823,Top rated app,0.0,2018,The Food Network Channel pretty much I watch t...,Positive,0.358333,0.733333,0.7574
28981,Food Network,FAMILY,4.1,7823,,500000,Free,0.0,Teen,Entertainment,...,13.122363,8.964823,Top rated app,0.0,2018,Great I adore last weeks keeps glitching resta...,Positive,0.016667,0.605556,0.7003
28982,Food Network,FAMILY,4.1,7823,,500000,Free,0.0,Teen,Entertainment,...,13.122363,8.964823,Top rated app,0.0,2018,Love easy favorite shows it!,Positive,0.519444,0.811111,0.8858
28983,Food Network,FAMILY,4.1,7823,,500000,Free,0.0,Teen,Entertainment,...,13.122363,8.964823,Top rated app,0.0,2018,It lets watch something diners drive ins dives,Neutral,0.0,0.0,0.0


In [395]:
merged_df_t1["Rating_Group"] = merged_df_t1["Rating"].apply(rating_group)

In [396]:
merged_df_t1.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,...,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"If get free lives refill, continue accumulate ...",Positive,0.374411,0.556987,0.9623
1,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,My original rating 01/2015 5 Stars still holdi...,Positive,0.25,0.475,0.9039
2,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"This good time passing game. However, I like l...",Positive,0.200926,0.437963,0.9325
3,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,"Fun first, spending two weeks level makes want...",Positive,0.183333,0.296825,0.8885
4,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,20.030119,16.925762,Top rated app,0.0,2018,Please get rid amount pop ups love things holy...,Positive,0.319444,0.6,0.9835


In [397]:
# Step 7: Aggregate data for visualization
sentiment_counts = merged_df_t1.groupby(["Category", "Rating_Group", "Sentiment"]).size().reset_index(name="Count")

In [398]:
sentiment_counts

Unnamed: 0,Category,Rating_Group,Sentiment,Count
0,FAMILY,Above average,Negative,59
1,FAMILY,Above average,Neutral,25
2,FAMILY,Above average,Positive,99
3,FAMILY,Top rated app,Negative,803
4,FAMILY,Top rated app,Neutral,392
5,FAMILY,Top rated app,Positive,3975
6,GAME,Above average,Negative,18
7,GAME,Above average,Neutral,9
8,GAME,Above average,Positive,95
9,GAME,Top rated app,Negative,4028


In [399]:
#After applying all the filters for task 1 rating group are left with only two categories

In [400]:

fig_t1 = px.bar(
    sentiment_counts, 
    x="Category", 
    y="Count", 
    color="Sentiment", 
    barmode="stack",
    facet_col="Rating_Group",
    title="Sentiment Distribution of User Reviews by Category and Rating Group",
    labels={"Category": "App Category", "Count": "Number of Reviews"},
    color_discrete_map={"Positive": "green", "Neutral": "gray", "Negative": "red"},
    width=900,
    height=500
)

fig_t1.update_layout(
    autosize=True,  # Allow automatic resizing
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=50, b=10),
    height=600  # Increased height to avoid cramping
)
# Save the visualization as an HTML file
save_plot_as_html(fig_t1, "Sentiment_Distribution.html", "Sentiment distribution varies significantly across rating groups and categories.")
fig_t1.show()

# Task 2

In [401]:
# Convert current UTC time to IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.datetime.now(ist).time()

In [402]:
current_time

datetime.time(22, 37, 28, 135544)

In [403]:
# Define the allowed time window (6 PM - 8 PM IST)
start_time = datetime.time(18, 0)  # 18:00 IST
end_time = datetime.time(20, 0)    # 20:00 IST

In [404]:
# Aggregate installs by category and exclude unwanted categories
apps_df_t2 = apps_df.groupby('Category')['Installs'].sum().reset_index()


invalid value encountered in cast



In [405]:
apps_df_t2

Unnamed: 0,Category,Installs
0,ART_AND_DESIGN,124233100.0
1,AUTO_AND_VEHICLES,53129800.0
2,BEAUTY,26916200.0
3,BOOKS_AND_REFERENCE,1916292000.0
4,BUSINESS,863518100.0
5,COMICS,56036100.0
6,COMMUNICATION,24152240000.0
7,DATING,206522400.0
8,EDUCATION,533852000.0
9,ENTERTAINMENT,2455660000.0


In [406]:
apps_df_t2 = apps_df_t2[~apps_df_t2['Category'].str.startswith(('A', 'C', 'G', 'S'))]

In [407]:
apps_df_t2

Unnamed: 0,Category,Installs
2,BEAUTY,26916200.0
3,BOOKS_AND_REFERENCE,1916292000.0
4,BUSINESS,863518100.0
7,DATING,206522400.0
8,EDUCATION,533852000.0
9,ENTERTAINMENT,2455660000.0
10,EVENTS,15949410.0
11,FAMILY,10041130000.0
12,FINANCE,770312400.0
13,FOOD_AND_DRINK,257777800.0


In [408]:
# Get the top 5 categories
top_categories_2 = apps_df_t2.nlargest(5, 'Installs')

In [409]:
top_categories_2

Unnamed: 0,Category,Installs
25,PRODUCTIVITY,12463070000.0
29,TOOLS,11450720000.0
11,FAMILY,10041130000.0
24,PHOTOGRAPHY,9721243000.0
30,TRAVEL_AND_LOCAL,6361859000.0


In [410]:
# Create a new column to highlight categories with installs > 1M
top_categories_2['Highlight'] = top_categories_2['Installs'].apply(lambda x: 'High Installs' if x > 1_000_000 else 'Low Installs')

In [411]:
top_categories_2

Unnamed: 0,Category,Installs,Highlight
25,PRODUCTIVITY,12463070000.0,High Installs
29,TOOLS,11450720000.0,High Installs
11,FAMILY,10041130000.0,High Installs
24,PHOTOGRAPHY,9721243000.0,High Installs
30,TRAVEL_AND_LOCAL,6361859000.0,High Installs


In [412]:
# Define a list of unique countries
country_list = ['United States', 'India', 'Germany', 'United Kingdom', 'France']
country_mapping = {'United States': 'USA', 'India': 'IND', 'Germany': 'DEU', 'United Kingdom': 'GBR', 'France': 'FRA'}


In [413]:
# Assign unique countries to each category
top_categories_2['Country'] = [country_list[i] for i in range(len(top_categories_2))]
top_categories_2['iso_alpha'] = top_categories_2['Country'].map(country_mapping)


In [414]:
top_categories_2

Unnamed: 0,Category,Installs,Highlight,Country,iso_alpha
25,PRODUCTIVITY,12463070000.0,High Installs,United States,USA
29,TOOLS,11450720000.0,High Installs,India,IND
11,FAMILY,10041130000.0,High Installs,Germany,DEU
24,PHOTOGRAPHY,9721243000.0,High Installs,United Kingdom,GBR
30,TRAVEL_AND_LOCAL,6361859000.0,High Installs,France,FRA


In [415]:
# Generate the choropleth map only if the time condition is met
if start_time <= current_time <= end_time:
    fig_t2 = px.choropleth(
        top_categories_2, 
        locations='iso_alpha', 
        color='Highlight',
        hover_name='Category',
        hover_data=['Installs'],
        title='Global Installs by App Category',
        color_discrete_map={'Highlighted': 'red', 'Normal': 'blue'}
    )
    
    fig_t2.update_layout(
        geo=dict(bgcolor='black'),
        paper_bgcolor='black',
        font_color='white',
        title_font_size=16
    )
    
    # Save the plot and add it to the dashboard
    save_plot_as_html(fig_t2, "Choropleth_Map_t2.html", "This choropleth map shows the distribution of installs for the top 5 app categories worldwide, highlighting categories with installs exceeding 1 million in red.")
    fig_t2.show()
else:
    print("Current IST time is outside the allowed 6 PM - 9 PM range. Graph will not be displayed.")

Current IST time is outside the allowed 6 PM - 9 PM range. Graph will not be displayed.


# Task 3 

In [416]:
# Filter data based on conditions
apps_df_t3= apps_df[
    (apps_df["Content Rating"] == "Teen") &  # Only Teen-rated apps
    (apps_df["Installs"] > 10000) &  # Installs greater than 10k
    (apps_df["App"].str.startswith("E", na=False))  # Apps starting with 'E'
]

In [417]:
apps_df_t3

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
1354,"Eve Period Tracker - Love, Sex & Relationships...",HEALTH_AND_FITNESS,4.6,20326,28.0,1000000,Free,0.0,Teen,Health & Fitness,2018-08-04,2.9.18,4.1 and up,13.815511,9.919656,Top rated app,0.0,2018
1786,Episode - Choose Your Story,GAME,4.3,1841061,,50000000,Free,0.0,Teen,Simulation,2018-07-31,Varies with device,Varies with device,17.727534,14.425853,Top rated app,0.0,2018
1927,Earn to Die 2,GAME,4.6,1327265,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098631,Top rated app,0.0,2017
1978,Earn to Die 2,GAME,4.6,1327269,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098634,Top rated app,0.0,2017
2677,EHS Dongsen Shopping,SHOPPING,3.6,3656,9.0,1000000,Free,0.0,Teen,Shopping,2018-08-03,4.16.2,4.1 and up,13.815511,8.204125,Above average,0.0,2018
2768,Etsy: Handmade & Vintage Goods,SHOPPING,4.3,95520,15.0,10000000,Free,0.0,Teen,Shopping,2018-08-03,5.3.1,4.1 and up,16.118096,11.467091,Top rated app,0.0,2018
4066,E!,FAMILY,3.1,740,20.0,100000,Free,0.0,Teen,Entertainment,2018-05-24,3.3.1.41,4.4 and up,11.512925,6.60665,Above average,0.0,2018
4067,E! News,NEWS_AND_MAGAZINES,4.0,15443,25.0,1000000,Free,0.0,Teen,News & Magazines,2018-06-19,4.2.133,4.4 and up,13.815511,9.644911,Top rated app,0.0,2018
4073,Eternium,FAMILY,4.8,1506783,89.0,10000000,Free,0.0,Teen,Role Playing,2018-07-18,1.2.115,4.0 and up,16.118096,14.225487,Top rated app,0.0,2018
4332,EXO-L Amino for EXO Fans,SOCIAL,4.9,5677,67.0,50000,Free,0.0,Teen,Social,2018-07-13,1.8.19106,4.0.3 and up,10.819778,8.644178,Top rated app,0.0,2018


In [418]:
apps_df_t3.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
1354,"Eve Period Tracker - Love, Sex & Relationships...",HEALTH_AND_FITNESS,4.6,20326,28.0,1000000,Free,0.0,Teen,Health & Fitness,2018-08-04,2.9.18,4.1 and up,13.815511,9.919656,Top rated app,0.0,2018
1786,Episode - Choose Your Story,GAME,4.3,1841061,,50000000,Free,0.0,Teen,Simulation,2018-07-31,Varies with device,Varies with device,17.727534,14.425853,Top rated app,0.0,2018
1927,Earn to Die 2,GAME,4.6,1327265,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098631,Top rated app,0.0,2017
1978,Earn to Die 2,GAME,4.6,1327269,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098634,Top rated app,0.0,2017
2677,EHS Dongsen Shopping,SHOPPING,3.6,3656,9.0,1000000,Free,0.0,Teen,Shopping,2018-08-03,4.16.2,4.1 and up,13.815511,8.204125,Above average,0.0,2018


In [419]:
# Extract Year-Month for time series aggregation
apps_df_t3['Year-Month'] = apps_df_t3['Last Updated'].dt.to_period('M')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [420]:
apps_df_t3

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year,Year-Month
1354,"Eve Period Tracker - Love, Sex & Relationships...",HEALTH_AND_FITNESS,4.6,20326,28.0,1000000,Free,0.0,Teen,Health & Fitness,2018-08-04,2.9.18,4.1 and up,13.815511,9.919656,Top rated app,0.0,2018,2018-08
1786,Episode - Choose Your Story,GAME,4.3,1841061,,50000000,Free,0.0,Teen,Simulation,2018-07-31,Varies with device,Varies with device,17.727534,14.425853,Top rated app,0.0,2018,2018-07
1927,Earn to Die 2,GAME,4.6,1327265,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098631,Top rated app,0.0,2017,2017-04
1978,Earn to Die 2,GAME,4.6,1327269,99.0,50000000,Free,0.0,Teen,Racing,2017-04-12,1.3,2.3.3 and up,17.727534,14.098634,Top rated app,0.0,2017,2017-04
2677,EHS Dongsen Shopping,SHOPPING,3.6,3656,9.0,1000000,Free,0.0,Teen,Shopping,2018-08-03,4.16.2,4.1 and up,13.815511,8.204125,Above average,0.0,2018,2018-08
2768,Etsy: Handmade & Vintage Goods,SHOPPING,4.3,95520,15.0,10000000,Free,0.0,Teen,Shopping,2018-08-03,5.3.1,4.1 and up,16.118096,11.467091,Top rated app,0.0,2018,2018-08
4066,E!,FAMILY,3.1,740,20.0,100000,Free,0.0,Teen,Entertainment,2018-05-24,3.3.1.41,4.4 and up,11.512925,6.60665,Above average,0.0,2018,2018-05
4067,E! News,NEWS_AND_MAGAZINES,4.0,15443,25.0,1000000,Free,0.0,Teen,News & Magazines,2018-06-19,4.2.133,4.4 and up,13.815511,9.644911,Top rated app,0.0,2018,2018-06
4073,Eternium,FAMILY,4.8,1506783,89.0,10000000,Free,0.0,Teen,Role Playing,2018-07-18,1.2.115,4.0 and up,16.118096,14.225487,Top rated app,0.0,2018,2018-07
4332,EXO-L Amino for EXO Fans,SOCIAL,4.9,5677,67.0,50000,Free,0.0,Teen,Social,2018-07-13,1.8.19106,4.0.3 and up,10.819778,8.644178,Top rated app,0.0,2018,2018-07


In [421]:
# Aggregate total installs per month per category
apps_df_t3 = apps_df_t3.groupby(['Year-Month', 'Category'])['Installs'].sum().reset_index()

In [422]:
apps_df_t3

Unnamed: 0,Year-Month,Category,Installs
0,2014-07,GAME,5000000
1,2015-08,FAMILY,1000000
2,2017-03,SOCIAL,1000000
3,2017-04,FAMILY,100000
4,2017-04,GAME,100000000
5,2017-07,SPORTS,50000000
6,2018-01,PHOTOGRAPHY,1000000
7,2018-05,FAMILY,2100000
8,2018-06,FAMILY,650000
9,2018-06,NEWS_AND_MAGAZINES,1000000


In [423]:
# Convert back to datetime for plotting
apps_df_t3['Year-Month'] = apps_df_t3['Year-Month'].astype(str)
apps_df_t3['Year-Month'] = pd.to_datetime(apps_df_t3['Year-Month'])

In [424]:
apps_df_t3

Unnamed: 0,Year-Month,Category,Installs
0,2014-07-01,GAME,5000000
1,2015-08-01,FAMILY,1000000
2,2017-03-01,SOCIAL,1000000
3,2017-04-01,FAMILY,100000
4,2017-04-01,GAME,100000000
5,2017-07-01,SPORTS,50000000
6,2018-01-01,PHOTOGRAPHY,1000000
7,2018-05-01,FAMILY,2100000
8,2018-06-01,FAMILY,650000
9,2018-06-01,NEWS_AND_MAGAZINES,1000000


In [425]:
# Ensure sorting for correct MoM growth calculation
apps_df_t3 = apps_df_t3.sort_values(by=['Category', 'Year-Month'])

In [426]:
apps_df_t3

Unnamed: 0,Year-Month,Category,Installs
1,2015-08-01,FAMILY,1000000
3,2017-04-01,FAMILY,100000
7,2018-05-01,FAMILY,2100000
8,2018-06-01,FAMILY,650000
10,2018-07-01,FAMILY,65150000
13,2018-08-01,FAMILY,50000
0,2014-07-01,GAME,5000000
4,2017-04-01,GAME,100000000
11,2018-07-01,GAME,50000000
14,2018-08-01,GAME,10000000


In [427]:
# Calculate month-over-month (MoM) growth
apps_df_t3['MoM Growth'] = apps_df_t3.groupby('Category')['Installs'].pct_change() * 100

In [428]:
apps_df_t3

Unnamed: 0,Year-Month,Category,Installs,MoM Growth
1,2015-08-01,FAMILY,1000000,
3,2017-04-01,FAMILY,100000,-90.0
7,2018-05-01,FAMILY,2100000,2000.0
8,2018-06-01,FAMILY,650000,-69.047619
10,2018-07-01,FAMILY,65150000,9923.076923
13,2018-08-01,FAMILY,50000,-99.923254
0,2014-07-01,GAME,5000000,
4,2017-04-01,GAME,100000000,1900.0
11,2018-07-01,GAME,50000000,-50.0
14,2018-08-01,GAME,10000000,-80.0


In [429]:
# Highlight periods where MoM growth exceeds 20%
apps_df_t3['Significant Growth'] = apps_df_t3['MoM Growth'] > 20

In [430]:
apps_df_t3

Unnamed: 0,Year-Month,Category,Installs,MoM Growth,Significant Growth
1,2015-08-01,FAMILY,1000000,,False
3,2017-04-01,FAMILY,100000,-90.0,False
7,2018-05-01,FAMILY,2100000,2000.0,True
8,2018-06-01,FAMILY,650000,-69.047619,False
10,2018-07-01,FAMILY,65150000,9923.076923,True
13,2018-08-01,FAMILY,50000,-99.923254,False
0,2014-07-01,GAME,5000000,,False
4,2017-04-01,GAME,100000000,1900.0,True
11,2018-07-01,GAME,50000000,-50.0,False
14,2018-08-01,GAME,10000000,-80.0,False


In [431]:
# Get current IST time
ist_now = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(hours=5, minutes=30)
allowed_time_range = (18, 21)  # 6 PM to 9 PM IST

In [432]:
ist_now

datetime.datetime(2025, 2, 22, 22, 37, 28, 865156, tzinfo=datetime.timezone.utc)

In [433]:
# Restrict graph display between 6 PM - 9 PM IST
if allowed_time_range[0] <= ist_now.hour < allowed_time_range[1]:
    # Plot time series with month-to-month granularity
    fig_t3 = px.line(
        apps_df_t3,
        x='Year-Month',
        y='Installs',
        color='Category',
        title='Time Series Trend of Installs (Teen Apps Starting with "E")',
        markers=True
    )

    # Highlight areas where MoM growth exceeds 20%
    for category in apps_df_t3['Category'].unique():
        category_data = apps_df_t3[apps_df_t3['Category'] == category]
        significant_growth = category_data[category_data['Significant Growth']]

        if not significant_growth.empty:
            fig_t3.add_trace(
                go.Scatter(
                    x=significant_growth['Year-Month'],
                    y=significant_growth['Installs'],
                    fill='tozeroy',
                    mode='none',
                    fillcolor='rgba(255,0,0,0.3)',
                    name=f"Growth >20% ({category})"
                )
            )

    # Update layout styling for month-to-month display
    fig_t3.update_layout(
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title_font={'size': 16},
        xaxis=dict(
            title='Month-Year',
            title_font={'size': 12},
            tickmode='linear',
            dtick='M1',  # Ensures 1-month intervals
            tickformat="%b %Y"
        ),
        yaxis=dict(title='Total Installs', title_font={'size': 12}),
        margin=dict(l=10, r=10, t=30, b=10)
    )

    # Save plot
    save_plot_as_html(fig_t3, "TimeSeries_Growth.html", 
                      "Significant growth periods (MoM > 20%) are shaded under the curve.")
else:
    print("Current IST time is outside the allowed 6 PM - 9 PM range. Graph will not be displayed.")


Current IST time is outside the allowed 6 PM - 9 PM range. Graph will not be displayed.


# Dashboard Integration

In [434]:
final_html=dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)

In [435]:
dashboard_path=os.path.join(html_files_path,"web page.html")

In [436]:
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

In [437]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True

# ALL TASKS COMPLETED