Import Libraries

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

Time Check

In [2]:
ist = pytz.timezone('Asia/Kolkata')
now = datetime.now(ist).time()

if 18 <= now.hour < 21:
    show_chart = True
else:
    show_chart = False

 Load and Clean Data

In [3]:
df = pd.read_csv(r'C:\Users\vishal\Desktop\Dataset\Play Store Data.csv')

In [4]:
df = df.dropna(subset=['App', 'Category', 'Reviews', 'Installs', 'Last Updated'])

In [5]:
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

In [6]:
df['Installs'] = pd.to_numeric(
    df['Installs'].str.replace('[+,]', '', regex=True), 
    errors='coerce')

In [7]:
df = df.dropna(subset=['Installs'])

df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df = df.dropna(subset=['Last Updated'])

df['Month'] = df['Last Updated'].dt.to_period('M').astype(str)

Apply All Filters

In [8]:
# App name must NOT start with x/y/z
mask1 = ~df['App'].str.lower().str.startswith(('x', 'y', 'z'))

# Category must start with B, C, or E
mask2 = df['Category'].str.upper().str.startswith(('B', 'C', 'E'))

# Reviews > 500
mask3 = df['Reviews'] > 500

# App name must NOT contain letter "S"
mask4 = ~df['App'].str.contains('S', case=False)

df = df[mask1 & mask2 & mask3 & mask4]


Translate Category Names

In [9]:
# Translate specific categories
translations = {
    "Beauty": "सौंदर्य",
    "Business": "வணிகம்",
    "Dating": "Partnersuche"
}

df['Category'] = df['Category'].replace(translations)

Aggregate Data

In [14]:
grouped = df.groupby(['Month', 'Category'])['Installs'].sum().reset_index()

# Sort by Month to allow time series
grouped['Month'] = pd.to_datetime(grouped['Month'])

grouped = grouped.sort_values(by=['Category', 'Month'])

Calculate MoM Growth Flag

In [15]:
grouped['Install Growth %'] = grouped.groupby('Category')['Installs'].pct_change() * 100
grouped['Highlight'] = grouped['Install Growth %'] > 20

 Plot the Chart

In [17]:
import plotly.graph_objects as go
import plotly.express as px

if show_chart:
    fig = px.line(
        grouped,
        x='Month',
        y='Installs',
        color='Category',
        title='Monthly Install Trend by Category (with MoM > 20% Highlighted)'
    )

    # Highlight growth areas
    for category in grouped['Category'].unique():
        highlight_data = grouped[(grouped['Category'] == category) & (grouped['Highlight'])]

        fig.add_trace(go.Scatter(
            x=highlight_data['Month'],
            y=highlight_data['Installs'],
            mode='markers',
            marker=dict(size=10, color='red', symbol='circle'),
            name=f'{category} - Growth > 20%'
        ))

    fig.show()
else:
    print("⏳ This chart is only visible from 6 PM to 9 PM IST.")

⏳ This chart is only visible from 6 PM to 9 PM IST.
