In [2]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os

In [4]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\njsga\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [5]:
apps_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Datasets/PlayStore Data.csv")
reviews_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Datasets/User reviews.csv")

In [6]:
apps_df.head()

Unnamed: 0.1,Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [7]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [8]:
# Data Cleaning 

apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns:
    apps_df[column].fillna(apps_df[column].mode()[0])
apps_df.drop_duplicates(inplace=True)    
apps_df = apps_df[apps_df['Rating']<=5]
reviews_df.dropna(subset=['Translated_Review'],inplace=True)

In [9]:
#Convert the Installs columns to numeric by removing commas and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)

# Convert Price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [10]:
apps_df = apps_df.loc[:, ~apps_df.columns.str.contains('^Unnamed')]

In [11]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews             int64
Size               object
Installs            int64
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [12]:
merged_df = pd.merge(apps_df,reviews_df,on='App',how='inner')
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.25,1.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.0,0.0
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.5,0.6
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.8,0.9


In [13]:
merged_df.isnull().sum()

App                           0
Category                      0
Rating                        0
Reviews                       0
Size                      11860
Installs                      0
Type                          0
Price                         0
Content Rating                0
Genres                        0
Last Updated                  0
Current Ver                   0
Android Ver                   0
Translated_Review             0
Sentiment                     0
Sentiment_Polarity            0
Sentiment_Subjectivity        0
dtype: int64

In [14]:
# Data Transformation 

In [15]:
def convert_size(size):
    if isinstance(size, str): 
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024 
        elif 'G' in size:
            return float(size.replace('G', '')) * 1024 
        else:
            return None  
    return size 

In [16]:
apps_df.loc[:, 'Size'] = apps_df['Size'].apply(convert_size)

In [17]:
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9652,FR Calculator,FAMILY,4.0,7,2.6,500,Free,0.0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
9654,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
9655,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,Everyone,Education,"July 6, 2018",1,4.1 and up
9657,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [18]:
# Logarthmic

apps_df['Log_Installs'] = np.log(apps_df['Installs'])
apps_df['Log_Reviews'] = np.log(apps_df['Reviews'])

In [19]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews             int64
Size               object
Installs            int64
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
Log_Installs      float64
Log_Reviews       float64
dtype: object

In [20]:
def rating_group(rating):
    if rating >=4:
        return 'Top rated app'
    elif rating >=3:
        return 'Above average'
    else:
        return 'Below average'
apps_df['Rating_Group']=apps_df['Rating'].apply(rating_group)        

In [21]:
# revenue column
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']

In [22]:
# sentiment analysis NLP 

In [23]:
''' A Sentiment Analyzer in NLP (Natural Language Processing) is a tool or model that determine the emotional tone or attitude expressed in a piece of text.
It helps classify the sentiment as positive, negative, or neutral, and sometimes even as more specific emotions (like happy, sad, angry, etc.) '''

' A Sentiment Analyzer in NLP (Natural Language Processing) is a tool or model that determine the emotional tone or attitude expressed in a piece of text.\nIt helps classify the sentiment as positive, negative, or neutral, and sometimes even as more specific emotions (like happy, sad, angry, etc.) '

In [24]:
#  How it works:
''' 1. Text Input: The input is usually a piece of text, such as a sentence, review, tweet,
or any other written content.


2. Processing: The sentiment analyzer processes the text by breaking it down into smaller units 
(like words or phrases) and evaluating how these units convey sentiment. It uses:

      a. Lexicons: Predefined lists of words associated with positive or negative sentiments.
      b. Machine Learning Models: Trained models (like Naive Bayes, LSTM, BERT) that learn
                                 from large datasets to detect sentiment based on context and word usage.
3. Output: It then assigns a sentiment score (often between -1 and 1) or labels (positive, negative, neutral). For example:
           Positive: "I like Chocolates!"
           Negative: "I hate this Person."
           Neutral: "This book is okay." '''         

' 1. Text Input: The input is usually a piece of text, such as a sentence, review, tweet,\nor any other written content.\n\n\n2. Processing: The sentiment analyzer processes the text by breaking it down into smaller units \n(like words or phrases) and evaluating how these units convey sentiment. It uses:\n\n      a. Lexicons: Predefined lists of words associated with positive or negative sentiments.\n      b. Machine Learning Models: Trained models (like Naive Bayes, LSTM, BERT) that learn\n                                 from large datasets to detect sentiment based on context and word usage.\n3. Output: It then assigns a sentiment score (often between -1 and 1) or labels (positive, negative, neutral). For example:\n           Positive: "I like Chocolates!"\n           Negative: "I hate this Person."\n           Neutral: "This book is okay." '

In [25]:
# For example - Sentence  "I love Chocolates!"
# The produce score like :

{
    'positive': 0.9,
    'neutral': 0.1,
    'negative': 0.0
}

{'positive': 0.9, 'neutral': 0.1, 'negative': 0.0}

In [26]:
!pip install vaderSentiment



In [27]:
pip show vaderSentiment

Name: vaderSentiment
Version: 3.3.2
Summary: VADER Sentiment Analysis. VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media, and works well on texts from other domains.
Home-page: https://github.com/cjhutto/vaderSentiment
Author: C.J. Hutto
Author-email: cjhutto@gatech.edu
License: MIT License
Location: E:\Data Analyst Intern Tasks\Jupyter\venv\Lib\site-packages
Requires: requests
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [28]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd



analyzer = SentimentIntensityAnalyzer()
print(analyzer.polarity_scores("This is awesome!")) 

{'neg': 0.0, 'neu': 0.313, 'pos': 0.687, 'compound': 0.6588}


In [29]:
# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

In [30]:
#Polarity Scores in SIA
#Positive, Negative, neutral and compound


# Positive polarity
review = "This app is amazing!! I loved its features"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.42, 'pos': 0.58, 'compound': 0.8513}


In [31]:
#Negative polarity
review = "This app is bad! I hate its features"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.555, 'neu': 0.445, 'pos': 0.0, 'compound': -0.8172}


In [32]:
#Neutral polarity
review = "This app is okay"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.612, 'pos': 0.388, 'compound': 0.2263}


In [33]:
reviews_df['Sentiment_Score'] = reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])

In [34]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333,0.9531
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462,0.6597
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875,0.6249
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3,0.6369
5,10 Best Foods for You,Best way,Positive,1.0,0.3,0.6369


In [35]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'],errors = 'coerce')

In [36]:
apps_df['Year']=apps_df['Last Updated'].dt.year

In [37]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.21034,5.068904,Top rated app,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,Above average,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top rated app,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,Top rated app,0.0,2018


In [38]:
# Plotly 

In [39]:
# create a directory for html files

html_files_path='./'
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [40]:
plot_containers=""

In [41]:
def save_plot_as_html(fig,filename,insight):
    global plot_containers
    filepath=os.path.join(html_files_path,filename)
    html_content=pio.to_html(fig,full_html=False,include_plotlyjs='inline')
    #append the plot and its insight to plot_containers
    plot_containers += f"""
    <div class = "plot-container" id='{filename}' onclick='openPlot('{filename}')'>
         <div class="plot"> {html_content}</div>
         <div class ='insights'>{insight}</div>
    </div>
    """
    fig.write_html(filepath,full_html=False, include_plotlyjs='inline')

In [42]:
plot_width = 400
plot_height = 300
plot_bg_color='black'
text_color = 'white'
title_font = {'size':16}
axis_font = {'size':12}

In [43]:
# figure1
category_counts = apps_df['Category'].value_counts().nlargest(10)
fig1=px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x':'Category','y':'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig1,'Category Graph 1.html','The top categoris on the Play Store are dominated by tools, entertainment, and productivity apps')

In [44]:
# Plotly 

In [45]:
# figure2
type_counts = apps_df['Type'].value_counts()
fig2=px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title='App Type Distrubution',
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=400,
    height=300
)
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig2,'Type Graph 2.html','Most apps on the PlayStore are free, indicating a stratergy to attract users first and monetize through ads or in app purchases')

In [46]:
# figure3
fig3=px.histogram(
    apps_df,
    x='Rating',
    nbins=20,
    title='Rating Distribution',
    color_discrete_sequence= ['#636EFA'],
    width=400,
    height=300
)
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig3,'Rating Graph 3.html','Ratings are skewed towards higher values, suggesting that most apps are rated favorably by users')

In [47]:
# figure4
sentiment_counts = reviews_df['Sentiment_Score'].value_counts()
fig4=px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x':'Sentiment Score','y':'Count'},
    title='Sentiment Distribution',
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.RdPu,
    width=400,
    height=300
)
fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig4,'Sentiment Graph 4.html','Sentiments in reviews show a mix of positive and negative feedback, with a slight lean towards positive sentiments')

In [48]:
# figure5
installs_by_category = apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation = 'h',
    labels={'x':'Installs','y':'Category'},
    title='Installs by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)
fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig5,'Installs Graph 5.html','The categories with the most installs are social and communicatio apps, reflecting their board appeal and daily usage')

In [49]:
#plotly 

In [50]:
# figure 6
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6=px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    orientation = 'h',
    labels={'x':'Year','y':'Number of Updates'},
    title='Number of Updated Over the Years',
    color_discrete_sequence=['#AB63FA'],
    width=plot_width,
    height=plot_height
)
fig6.update_layout(
    plot_bgcolor=plot_bg_color,
    paper_bgcolor=plot_bg_color,
    font_color=text_color,
    title_font=title_font,
    xaxis=dict(title_font=axis_font),
    yaxis=dict(title_font=axis_font),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig6,'Updates Per Year 6.html','Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps.')

In [51]:
# figure 7
revenue_by_category = apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7=px.bar(
    x=revenue_by_category.index,
    y=revenue_by_category.values,
    labels={'x':'Category','y':'Revenue'},
    title='Revenue by Category',
    color=revenue_by_category.index,
    color_discrete_sequence=px.colors.sequential.Greens,
    width=400,
    height=300
)
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig7,'Revenue Graph 7.html','Categories such as Bussiness and Productivity lead in revenue generation, indicating their monetization potential')

In [52]:
# figure 8
genre_counts = apps_df['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)
fig8=px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x':'Genre','y':'Count'},
    title='Top Genres',
    color=genre_counts.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=400,
    height=300
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig8,'Genre Graph 8.html','Action and Casual genres are the most common, reflecting users preference for engaging and easy-to-play game')

In [53]:
# figure 9
fig9=px.scatter(
    apps_df,
    x='Last Updated',
    y='Rating',
    color='Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=400,
    height=300
)
fig9.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin = dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig9,'Update Graph 9.html','The scatter Plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates dont always result in better ratings.')

In [54]:
    # figure 10
    fig10=px.box(
        apps_df,
        x='Type',
        y='Rating',
        color='Type',
        title='Rating for Paid vs free Apps',
        color_discrete_sequence=px.colors.qualitative.Pastel,
        width=400,
        height=300
    )
    fig10.update_layout(
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title_font={'size':16},
        xaxis=dict(title_font={'size':12}),
        yaxis=dict(title_font={'size':12}),
        margin = dict(l=10,r=10,t=30,b=10)
    )
    #fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
    save_plot_as_html(fig10,'Paid Free Graph 10.html','paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for')

In [55]:
plot_containers_split = plot_containers.split('</div>')

In [56]:
if len(plot_containers_split) > 1:
    final_plot = plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers

In [57]:
# CREATING WEB DASHBOARD 

In [58]:
dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444;
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            padding: 20px;
        }}
        .plot-container {{
            border: 2px solid #555;
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0, 0, 0, 0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container:hover .insights {{
            display: block;
        }}
    </style>
    <script>
        function openPlot(filename) {{
            window.open(filename, '_blank');
        }}
    </script>
</head>
<body>
  <div class="header">
      <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/880px-Logo_2013_Google.png" alt="Google Logo">
      <h1>Google Play Store reviews Analytics</h1>
      <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
  </div>
  <div class="container">
      {plots}
  </div>
</body>
</html>
"""


In [59]:
final_html = dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)

In [60]:
dashboard_path = os.path.join(html_files_path,"web page.html")

In [61]:
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

In [62]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True

In [63]:
apps_df.to_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_GooglePlaystore.csv", index=False)

In [64]:
reviews_df.to_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_UserReviews.csv", index=False)