In [1]:
# Enhanced Interactive Choropleth Map with Additional Features

In [1]:
import pandas as pd
import plotly.express as px
from datetime import datetime, time
import pytz
import numpy as np

In [2]:
# Load the datasets
apps_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_GooglePlaystore.csv")
reviews_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_UserReviews.csv")

In [3]:

## DATA PREPROCESSING ENHANCEMENTS ##

# 1. More robust null handling
def clean_data(df):
    # Drop rows with null in critical columns
    df = df.dropna(subset=['Category', 'Installs'])
    
    # Fill Size nulls with median (better than dropping)
    df['Size'] = df['Size'].fillna(df['Size'].median())
    
    # Convert Installs to numeric (in case it's stored as string)
    df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')
    df = df.dropna(subset=['Installs'])
    
    return df

apps_df = clean_data(apps_df)


In [4]:
# 2. Add continent data (for better geographic visualization)
# This is a mock function - in reality you'd need actual country data
def add_geographic_data(df):
    continents = ['Asia', 'Europe', 'Africa', 'North America', 'South America', 'Oceania']
    df['Continent'] = np.random.choice(continents, size=len(df))
    return df

apps_df = add_geographic_data(apps_df)

In [5]:
## TIME HANDLING ENHANCEMENTS ##

def is_display_time():
    ist = pytz.timezone('Asia/Kolkata')
    current_time = datetime.now(ist).time()
    return time(18, 0) <= current_time <= time(20, 0)

In [6]:

## DATA PREPARATION ENHANCEMENTS ##

def prepare_choropleth_data():
    # Filter categories that don't start with A, C, G, or S
    filtered_categories = apps_df[
        ~apps_df['Category'].str.startswith(('A', 'C', 'G', 'S'), na=False)
        & (apps_df['Category'].notna())
    ]
    
    # Get top 5 categories by installs
    top_5 = filtered_categories.groupby('Category')['Installs'].sum().nlargest(5).index
    
    # Filter data for these categories
    filtered_data = filtered_categories[filtered_categories['Category'].isin(top_5)]
    
    # Aggregate by Continent and Category
    map_data = filtered_data.groupby(['Continent', 'Category'])['Installs'].sum().reset_index()
    
    # Add highlight flag and formatted installs text
    map_data['Highlight'] = map_data['Installs'] > 1000000
    map_data['Installs_Text'] = map_data['Installs'].apply(
        lambda x: f"{x/1e6:.1f}M" if x >= 1e6 else f"{x/1e3:.0f}K"
    )
    
    return map_data


In [7]:

## VISUALIZATION ENHANCEMENTS ##

def create_enhanced_choropleth():
    if not is_display_time():
        print("The choropleth map is only available between 6 PM and 8 PM IST.")
        return None
    
    map_data = prepare_choropleth_data()
    
    # Create figure with enhanced features
    fig = px.choropleth(
        map_data,
        locations='Continent',
        locationmode='continent names',
        color='Installs',
        hover_name='Category',
        hover_data={'Installs_Text': True, 'Continent': False, 'Installs': False},
        animation_frame='Category',
        title='<b>Global Installs by Top 5 Categories</b><br><i>(Excluding categories starting with A, C, G, S)</i>',
        color_continuous_scale=px.colors.sequential.Plasma,
        scope='world',
        labels={'Installs_Text': 'Total Installs'},
        height=700,
        width=1000
    )
    
    # Enhanced layout configuration
    fig.update_layout(
        geo=dict(
            showframe=False,
            showcoastlines=True,
            projection_type='natural earth',
            landcolor='lightgray',
            lakecolor='lightblue'
        ),
        title={
            'text': "<b>Global Installs by Top 5 Categories</b><br><i>(Excluding categories starting with A, C, G, S)</i>",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': {'size': 20}
        },
        coloraxis_colorbar={
            'title': 'Installs',
            'tickprefix': '',
            'ticksuffix': '',
            'thickness': 20,
            'len': 0.75
        },
        margin={"r":30,"t":100,"l":30,"b":30},
        hoverlabel=dict(
            bgcolor="white",
            font_size=16,
            font_family="Rockwell"
        )
    )
    
    # Add custom hover template
    fig.update_traces(
        hovertemplate="<b>%{hovertext}</b><br>Continent: %{location}<br>Installs: %{customdata[0]}<extra></extra>"
    )
    
    # Highlight categories with > 1M installs
    for frame in fig.frames:
        frame.data[0].marker.line.width = [2 if x else 0 for x in 
            map_data[map_data['Category'] == frame.name]['Highlight']]
        frame.data[0].marker.line.color = 'gold'
    
    # Add range slider for animation control
    fig.update_layout(
        sliders=[{
            'active': 0,
            'steps': [{
                'args': [[f.name], {'frame': {'duration': 500, 'redraw': True}}],
                'label': f.name,
                'method': 'animate'
            } for f in fig.frames],
            'x': 0.1,
            'len': 0.8,
            'xanchor': 'center',
            'yanchor': 'top',
            'currentvalue': {
                'prefix': 'Category: ',
                'visible': True,
                'xanchor': 'right'
            }
        }]
    )
    
    # Add play/pause button
    fig.update_layout(
        updatemenus=[{
            'type': 'buttons',
            'showactive': False,
            'x': 0.1,
            'y': -0.1,
            'buttons': [{
                'label': 'Play',
                'method': 'animate',
                'args': [None, {
                    'frame': {'duration': 1000, 'redraw': True},
                    'fromcurrent': True,
                    'transition': {'duration': 300}
                }]
            }]
        }]
    )
    
    return fig


In [8]:

## MAIN EXECUTION ##
if __name__ == "__main__":
    if is_display_time():
        choropleth_fig = create_enhanced_choropleth()
        if choropleth_fig:
            # Save to HTML with all dependencies
            choropleth_fig.write_html(
                "enhanced_choropleth.html",
                full_html=True,
                include_plotlyjs='cdn',
                auto_open=True
            )
    else:
        print("The choropleth map is only available between 6 PM and 8 PM IST.")

ValueError: 
    Invalid value of type 'builtins.str' received for the 'locationmode' property of choropleth
        Received value: 'continent names'

    The 'locationmode' property is an enumeration that may be specified as:
      - One of the following enumeration values:
            ['ISO-3', 'USA-states', 'country names', 'geojson-id']