In [1]:
#Step 1:Importing Libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from datetime import datetime
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pytz
import nltk
import webbrowser
import os
import random

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\souja\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
#Create an interactive Choropleth map using Plotly to visualize global installs by Category. Apply filters to show data for only the top 5 app 
#categories and highlight category where the number of installs exceeds 1 million. The app category should not start with the characters “A,”
#“C,” “G,” or “S.” This graph should work only between 6 PM IST and 8 PM IST; apart from that time, we should not show it in the dashboard itself.

In [4]:
apps_df=pd.read_csv('Play Store Data.csv')
reviews_df=pd.read_csv('User Reviews.csv')

In [5]:
merged_df = pd.merge(apps_df, reviews_df, on='App', how='inner')

In [6]:
merged_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.250,1.000000
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.000,0.000000
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,,,,
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.500,0.600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122657,FP Notebook,MEDICAL,4.5,410,60M,"50,000+",Free,0,Everyone,Medical,"March 24, 2018",2.1.0.372,4.4 and up,,,,
122658,FP Notebook,MEDICAL,4.5,410,60M,"50,000+",Free,0,Everyone,Medical,"March 24, 2018",2.1.0.372,4.4 and up,,,,
122659,FP Notebook,MEDICAL,4.5,410,60M,"50,000+",Free,0,Everyone,Medical,"March 24, 2018",2.1.0.372,4.4 and up,,,,
122660,FP Notebook,MEDICAL,4.5,410,60M,"50,000+",Free,0,Everyone,Medical,"March 24, 2018",2.1.0.372,4.4 and up,,,,


In [7]:
#Step 2:Data cleaning

In [8]:
merged_df['Installs'] = merged_df['Installs'].str.replace('+','').str.replace(',','').astype(int)
top_categories = merged_df.groupby('Category')['Installs'].sum().nlargest(5).index
merged_df_top5 = merged_df[merged_df['Category'].isin(top_categories)]

In [9]:
merged_df_top5 = merged_df_top5[~merged_df_top5['Category'].str.startswith(('A','C','G','S'))]

In [10]:
merged_df_top5['High Installs'] = merged_df_top5['Installs'] > 1_000_000

In [11]:
country_dict = {
    'India': 'IND',
    'United States': 'USA',
    'Brazil': 'BRA',
    'Germany': 'DEU',
    'Japan': 'JPN',
    'France': 'FRA',
    'United Kingdom': 'GBR',
    'Canada': 'CAN',
    'Australia': 'AUS',
    'South Africa': 'ZAF'
}

country_list = list(country_dict.keys())
merged_df_top5['Country'] = [random.choice(country_list) for _ in range(len(merged_df_top5))]
merged_df_top5['Country_Code'] = merged_df_top5['Country'].map(country_dict)

In [12]:
#Step 3:Data Transmission

In [13]:
grouped_df = merged_df_top5.groupby(['Country', 'Country_Code', 'Category'], as_index=False)['Installs'].sum()
print(grouped_df)

           Country Country_Code            Category      Installs
0        Australia          AUS  NEWS_AND_MAGAZINES   65303300000
1        Australia          AUS         PHOTOGRAPHY  114190800000
2        Australia          AUS        PRODUCTIVITY  108923700000
3           Brazil          BRA  NEWS_AND_MAGAZINES   74507500000
4           Brazil          BRA         PHOTOGRAPHY  113083600000
5           Brazil          BRA        PRODUCTIVITY   97589000000
6           Canada          CAN  NEWS_AND_MAGAZINES   66182100000
7           Canada          CAN         PHOTOGRAPHY  112827400000
8           Canada          CAN        PRODUCTIVITY   96967300000
9           France          FRA  NEWS_AND_MAGAZINES   67857100000
10          France          FRA         PHOTOGRAPHY  112020900000
11          France          FRA        PRODUCTIVITY   95868500000
12         Germany          DEU  NEWS_AND_MAGAZINES   65548600000
13         Germany          DEU         PHOTOGRAPHY  109012100000
14        

In [14]:
#Step 4:Sentiment Analysis

In [15]:
sentiment_summary = (
    merged_df.groupby('Category')['Sentiment']
    .value_counts()
    .unstack(fill_value=0)
    .reset_index()
)

sentiment_summary['Dominant_Sentiment'] = sentiment_summary[['Positive','Negative','Neutral']].idxmax(axis=1)

grouped_with_sentiment = grouped_df.merge(
    sentiment_summary[['Category','Dominant_Sentiment']],
    on='Category',
    how='left'
)

print(grouped_with_sentiment.head())

     Country Country_Code            Category      Installs Dominant_Sentiment
0  Australia          AUS  NEWS_AND_MAGAZINES   65303300000           Positive
1  Australia          AUS         PHOTOGRAPHY  114190800000           Positive
2  Australia          AUS        PRODUCTIVITY  108923700000           Positive
3     Brazil          BRA  NEWS_AND_MAGAZINES   74507500000           Positive
4     Brazil          BRA         PHOTOGRAPHY  113083600000           Positive


In [16]:
#Step 5:Plotting Graph 

In [17]:
html_files_paths="./"
if not os.path.exists(html_files_paths):
    os.makedirs(html_files_path)

In [18]:
plot_containers=""

In [19]:
def save_plot_as_html(fig,filename,insight):
    global plot_containers
    filepath= os.path.join(html_files_paths,filename)
    html_content=pio.to_html(fig,full_html=False,include_plotlyjs='inline')
    plot_containers+= f"""
    <div class ="plot-container" id="{filename}" onclick ="openPlot('{filename}')">
       <div class="plot">{html_content}</div> 
       <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath,full_html=False,include_plotlyjs="inline")


In [20]:
ist = pytz.timezone("Asia/Kolkata")
now = datetime.now(ist)

if 18 <= now.hour < 20:
    fig3= px.choropleth(
        grouped_with_sentiment,
        locations="Country_Code",
        color="Installs",
        hover_name="Country",
        hover_data={
            "Category": True,
            "Installs": True,
            "Dominant_Sentiment": True,
            "Country_Code": False
        },
        title="Global Installs by Category with Sentiment",
        color_continuous_scale="Viridis"
    )

    fig3.update_layout(
        geo=dict(showframe=False, showcoastlines=True, projection_type="equirectangular"),
        plot_bgcolor="black",
        paper_bgcolor="black",
        font_color="white"
    )

    fig3.show()
    save_plot_as_html(
    fig3,
    "Choropleth_Global_Installs.html",
    "Choropleth map showing global app installs by category, with hover info displaying dominant sentiment."
    )
else:
    print("🌍 Map is available only between 6 PM and 8 PM IST.")


🌍 Map is available only between 6 PM and 8 PM IST.


In [21]:
print(grouped_with_sentiment.sort_values("Installs", ascending=False).head(10))


           Country Country_Code      Category      Installs Dominant_Sentiment
25  United Kingdom          GBR   PHOTOGRAPHY  122950100000           Positive
20           Japan          JPN  PRODUCTIVITY  122093800000           Positive
19           Japan          JPN   PHOTOGRAPHY  120980500000           Positive
1        Australia          AUS   PHOTOGRAPHY  114190800000           Positive
4           Brazil          BRA   PHOTOGRAPHY  113083600000           Positive
7           Canada          CAN   PHOTOGRAPHY  112827400000           Positive
22    South Africa          ZAF   PHOTOGRAPHY  112446300000           Positive
16           India          IND   PHOTOGRAPHY  112262200000           Positive
10          France          FRA   PHOTOGRAPHY  112020900000           Positive
14         Germany          DEU  PRODUCTIVITY  111925800000           Positive


In [22]:
plot_containers_split=plot_containers.split('</div')

In [23]:
if len(plot_containers_split) > 1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers

In [24]:
dashboard_html= """
<!DOCTYPE html>
<html lang="en">
<head>
     <meta charset="UTF-8">
     <meta name=viewport" content="width=device-width,initial-scale-1.0">
     <title>Google Play Store Review Analytics</title>
     <style>
        body {{ 
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
         }}
         .header{{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444;
        }}
        .header img{{
            margin: 0 10px;
            height: 50px;
        }}
        .container{{
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            padding: 20px;
        }}
        .plot-container{{
            border: 2px solid #555;
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights{{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0,0,0,0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container:hover .insights{{ 
            display: block;
        }}
        </style>
        <script>
             function openPlot(filename){{
                 window.open(filename,'_blank');
                 }}
        </script>
    </head>
    <body>
        <div class= "header">
            <img src="https://www.keyweo.com/wp-content/uploads/2022/04/google-logo-history.jpg" alt="Google logo">
            <h1>Google Play Store Review Analytics</h1>
            <img src="https://www.sociocs.com/images/badge-google-play.png" alt="Google Play Store Logo">
        </div>
        <div class="container">
            {plots}
        </div>
    </body>
    </html>
    """

In [25]:
plot_width=400
plot_height=300
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}


In [26]:
final_html=dashboard_html.format(plots=plot_containers, plot_width=plot_width, plot_height=plot_height)

In [27]:
dashboard_path=os.path.join(html_files_paths,"web page.html")

In [28]:
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

In [29]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True