In [112]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os

In [113]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [114]:
app=pd.read_csv('apps.csv')
rev=pd.read_csv('user_reviews.csv')

In [115]:
app.head()

Unnamed: 0.1,Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [116]:
rev.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [117]:
app=app.dropna(subset=['Rating'])
for col in app.columns:
    app[col].fillna(app[col].mode()[0])
app.drop_duplicates(inplace=True)
app=app[app['Rating']<=5]
rev.dropna(subset=['Translated_Review'],inplace=True)

In [118]:
app.dtypes

Unnamed: 0          int64
App                object
Category           object
Rating            float64
Reviews             int64
Size              float64
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [119]:
print(app['Installs'].dtype)
print(app['Price'].dtype)


object
object


In [120]:

# Clean and convert 'Installs' column
app['Installs'] = (
    app['Installs']
    .astype(str)  # Ensure values are treated as strings
    .str.replace(',', '', regex=True)  # Remove commas
    .str.replace(r'\+', '', regex=True)  # Remove plus signs
    .astype(int)  # Convert to integers
)

# Clean and convert 'Price' column
app['Price'] = (
    app['Price']
    .astype(str)  # Ensure values are treated as strings
    .str.replace(r'\$', '', regex=True)  # Remove dollar signs
    .astype(float)  # Convert to floats
)



In [121]:
merge_df=pd.merge(app,rev,on='App',how='inner')
merge_df.head(5)

Unnamed: 0.1,Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.25,1.0
1,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.0,0.0
3,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.5,0.6
4,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.8,0.9


In [122]:
app['Log_Install']=np.log(app['Installs'])
app['Log_review']=np.log(app['Reviews'])

In [123]:
def rating_group(rating):
    if rating>=4:
        return 'Top rated app'
    elif rating>=3:
        return 'Above avg'
    elif rating>=2:
        return 'Avg'
    else:
        return 'Below avg'
app['Rating_group']=app['Rating'].apply(rating_group)

In [124]:
app['Revenue']=app['Price']*app['Installs']
app.head()

Unnamed: 0.1,Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Install,Log_review,Rating_group,Revenue
0,0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,9.21034,5.068904,Top rated app,0.0
1,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,13.122363,6.874198,Above avg,0.0
2,2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0
3,3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0
4,4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,11.512925,6.874198,Top rated app,0.0


In [125]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [126]:
sia=SentimentIntensityAnalyzer()

In [127]:
rev['Sentiment_score']=rev['Translated_Review'].apply(lambda x:sia.polarity_scores(str(x))['compound'])
rev.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333,0.9531
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462,0.6597
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875,0.6249
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3,0.6369
5,10 Best Foods for You,Best way,Positive,1.0,0.3,0.6369


In [128]:
app['Last Updated']=pd.to_datetime(app['Last Updated'],errors='coerce')
app['year']=app['Last Updated'].dt.year

In [129]:
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [130]:
plot_width=400
plot_height=300
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}


In [131]:
plot_containers=""

In [132]:
def save_plot_as_html(fig,filename,insight):
    global plot_containers
    filepath=os.path.join(html_files_path,filename)
    html_content=pio.to_html(fig,full_html=False,include_plotlyjs='inline')
    plot_containers+=f"""
    <div class="plot_container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath,full_html=False, include_plotlyjs='inline')

In [133]:

category_counts = app['Category'].value_counts().nlargest(10)
fig1 = px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x': 'Category', 'y': 'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig1,"Category_Graph_1.html","The top categries on the play store are dominated by tools and productivity")

In [134]:
type_counts = app['Type'].value_counts()
fig2 = px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title='App type Distribution',
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=400,
    height=300
)
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig2,"type_Graph_2.html","Categries free and paid app")

In [135]:

fig3 = px.histogram(
    app,
    x='Rating',
    nbins=20,
    title='Rating Distribution',
    color_discrete_sequence=['#636EFA'],
    width=400,
    height=300
)
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig3,"Rating_Graph_3.html","Rating are skewed towords higher values, suggesting that most apps are ratedfaverably by users")

In [136]:

sentiment_counts = rev['Sentiment_score'].value_counts()
fig4 = px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x': 'sentiment', 'y': 'Count'},
    title='Sentiment Distribution',
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig4,"sentiment_Graph_4.html","Sentiments in reviews show a mix of positive and negative feedback")

In [137]:

installs_by_category = app.groupby('Category')['Installs'].sum().nlargest(10)
fig5 = px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation='h',
    labels={'x': 'Installs', 'y': 'Category'},
    title='installs by category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig5,"Installs_Graph_5.html","The categries with the most installs are social and communication apps")

In [138]:

updates_per_year = app['Last Updated'].dt.year.value_counts().sort_index()
fig6 = px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    labels={'x': 'Year', 'y': 'Number of Updates'},
    title='Number of Updates over the year',
    color_discrete_sequence=['#AB63FA'],
    width=plot_width,
    height=plot_height
)
fig6.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig6,"Updates_Graph_6.html","Updares have been increasing over the year, showing the developers")

In [139]:

Revenue_by_category = app.groupby('Category')['Revenue'].sum().nlargest(10)
fig7 = px.bar(
    x=Revenue_by_category.index,
    y=Revenue_by_category.values,
    labels={'x': 'Caregory', 'y': 'year'},
    title='Revenue by category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig7,"Revenue_Graph_7.html","Categries such as Business and productivity lead in revenue generation")

In [140]:

genre_counts = app['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)
fig8 = px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x': 'Genre', 'y': 'Count'},
    title='Top Genre',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=400,
    height=300
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig8,"Genre_Graph_8.html","Action and casual genres are the most common, reflecting users performance for engaging and easy-to-play games")

In [141]:

fig9 = px.scatter(
    app,
    x='Last Updated',
    y='Rating',
    color='Type',
    title='Impact of Last update on rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=400,
    height=300
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig9,"UpdateRting_Graph_9.html","The scatter plot shows a weak correlation between the last update and ratings")

In [142]:

fig10 = px.box(
    app,
    x='Type',
    y='Rating',
    color='Type',
    title='Rating for paid vs free apps',
    color_discrete_sequence=px.colors.qualitative.Pastel,
    width=400,
    height=300
)
fig10.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig10,"PaidFree_Graph_10.html","Paid apps generraly have higher rating in comparision to free apps")

In [143]:
plot_containers_split=plot_containers.split('</div')

In [144]:
if len(plot_containers_split)>1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers

In [166]:
dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Google Play Store Review Analytics</title>
        <style>
            body {{
                font-family: Arial, sans-serif;
                background-color: #333;
                color: #fff;
                margin: 0;
                padding: 0;
            }}
            .header {{
                display: flex;
                align-items: center;
                justify-content: center;
                padding: 20px;
                background-color: #444;
            }}
            .header img {{
                margin: 0 10px;
                height: 50px;
            }}
            .container {{
                display: flex;
                flex-wrap: wrap;
                justify-content: center;
                padding: 20px;
            }}
            .plot-container {{
                border: 2px solid #555;
                margin: 10px;
                padding: 10px;
                width: {{plot_width}}px;
                height: {{plot_height}}px;
                overflow: hidden;
                position: relative;
                cursor: pointer;
            }}
            .insights {{
                display: none;
                position: absolute;
                right: 10px;
                top: 10px;
                background-color: rgba(0, 0, 0, 0.7);
                padding: 5px;
                border-radius: 5px;
                color: #fff;
            }}
            .plot-container:hover .insights {{
                display: block;
            }}
        </style>
        <script>
            function openPlot(filename) {{
                window.open(filename, '_blank');
            }}
        </script>
    </head>
    <body>
        <div class="header">
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
            <h1>Google Play Store Reviews Analytics</h1>
        </div>
        <div class="container">
            {plots}
        </div>
    </body>
</html>
"""

In [167]:
final_html=dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)
#print(dashboard_html)

In [168]:
dashboard_path=os.path.join(html_files_path,"web page.html")

In [169]:
with open(dashboard_path, "w" , encoding="utf-8") as f:
    f.write(final_html)

In [170]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True