In [28]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

In [29]:
# Load dataset
df = pd.read_csv('Play Store Data.csv')
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [56]:
# Remove app categories starting with 'A', 'C', 'G', or 'S'
df = df[~df['Category'].str.startswith(('A', 'C', 'G', 'S'))]
def parse_installs(x):
    try:
        x_str = str(x)
        if x_str.isdigit():
            return int(x_str)
        x_str = x_str.replace(',', '').replace('+', '')
        return int(x_str)
    except Exception:
        return 0  # Return 0 if conversion fails (e.g., "Free" or invalid values)
df['Installs_num'] = df['Installs'].apply(parse_installs)

In [57]:
# Get the top 5 app categories by installs (global, not per country)
top5_cats = df.groupby('Category')['Installs_num'].sum().sort_values(ascending=False).head(5).index
df_top5 = df[df['Category'].isin(top5_cats)].copy()

In [58]:
# Create a dummy 'Country' column (since dataset has no country, default to 'Global')
df_top5['Country'] = 'World'

In [59]:
# Group by category
df_agg = df_top5.groupby(['Country', 'Category']).agg({'Installs_num':'sum'}).reset_index()


In [60]:
# Helper for color/highlight
df_agg['Highlight'] = df_agg['Installs_num'] > 1_000_000

In [81]:
# --- Time-based Dashboard Logic ---
def show_choropleth_6_to_8pm_ist():
    tz_ist = pytz.timezone('Asia/Kolkata')
    now_ist = datetime.now(tz_ist)
    return now_ist.hour >= 18 and now_ist.hour < 20
    # return True

if show_choropleth_6_to_8pm_ist():
    # Use bar plot since global choropleth requires region codes
    # To simulate: Create a bar coloring for category, highlight above 1 million installs
    color_map = {True: 'crimson', False: 'steelblue'}
    df_agg['color'] = df_agg['Highlight'].map(color_map)

    fig = px.bar(
        df_agg,
        x='Category',
        y='Installs_num',
        color='Highlight',
        color_discrete_map={True: 'crimson', False: 'steelblue'},
        text='Installs_num',
        title='Global App Installs by Top 5 Categories (excluding A/C/G/S)'
    )
    fig.update_traces(marker_line_color='black', marker_line_width=1.2)
    fig.update_layout(showlegend=False)
    fig.show()
else:
    print('The interactive map is only visible between 6 PM and 8 PM IST.')

The interactive map is only visible between 6 PM and 8 PM IST.


In [80]:
def show_choropleth_6_to_8pm_ist():
    # Override to always display chart for testing
    return True

if show_choropleth_6_to_8pm_ist():
    fig = px.bar(
        df_agg,
        x='Category',
        y='Installs_num',
        color='Highlight',
        color_discrete_map={True: 'crimson', False: 'steelblue'},
        text='Installs_num',
        title='Global App Installs by Top 5 Categories (excluding A/C/G/S)',
    )
    fig.update_traces(marker_line_color='black', marker_line_width=1.2)
    fig.update_layout(showlegend=False)
    fig.show()
else:
    print('The interactive map is only visible between 6 PM and 8 PM IST.')