 Import Libraries

In [16]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

Time Check (1 PM to 2 PM IST)

In [17]:
ist = pytz.timezone('Asia/Kolkata')
now = datetime.now(ist).time()

if 13 <= now.hour < 14:
    show_chart = True
else:
    show_chart = False

Load & Clean Data

In [18]:
df = pd.read_csv(r'C:\Users\vishal\Desktop\Dataset\Play Store Data.csv')
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [19]:
df = df.dropna(subset=['Installs', 'Price', 'Size', 'Type', 'Android Ver', 'App', 'Content Rating', 'Category'])

df['Installs'] = df['Installs'].str.replace('[+,]', '', regex=True).astype(float)
df['Price'] = df['Price'].str.replace('$', '').astype(float)

In [20]:
# Convert size to MB
def convert_size(val):
    if 'M' in str(val):
        return float(val.replace('M', ''))
    elif 'k' in str(val):
        return float(val.replace('k', '')) / 1024
    else:
        return None

In [21]:
df['Size'] = df['Size'].apply(convert_size)

In [22]:
# Compute Revenue for paid apps only
df['Revenue'] = df.apply(lambda row: row['Installs'] * row['Price'] if row['Type'] == 'Paid' else 0, axis=1)

# Convert Android Version to float
df['Android_Ver_Num'] = pd.to_numeric(df['Android Ver'].str.extract(r'(\d+\.\d+)')[0], errors='coerce')

Apply All Filters

In [23]:
df = df[
    (df['Installs'] >= 10000) &
    (df['Revenue'] >= 10000) &
    (df['Size'] > 15) &
    (df['Android_Ver_Num'] > 4.0) &
    (df['Content Rating'] == 'Everyone') &
    (df['App'].str.len() <= 30)
]

 Filter to Top 3 Categories by Installs

In [24]:
top_3_categories = df.groupby('Category')['Installs'].sum().nlargest(3).index
df = df[df['Category'].isin(top_3_categories)]

Group by Free vs Paid

In [25]:
summary = df.groupby('Type').agg({
    'Installs': 'mean',
    'Revenue': 'mean'
}).reset_index()

 Plot Dual Axis Chart (If Time Matches)

In [26]:
if show_chart:
    fig = go.Figure()

    # Bar for Installs
    fig.add_trace(go.Bar(
        x=summary['Type'],
        y=summary['Installs'],
        name='Average Installs',
        yaxis='y1',
        marker_color='royalblue'
    ))

    # Line for Revenue
    fig.add_trace(go.Scatter(
        x=summary['Type'],
        y=summary['Revenue'],
        name='Average Revenue',
        yaxis='y2',
        mode='lines+markers',
        marker_color='darkorange'
    ))

    # Layout
    fig.update_layout(
        title='Free vs Paid Apps – Avg Installs vs Revenue (Top 3 Categories)',
        xaxis=dict(title='App Type'),
        yaxis=dict(title='Avg Installs', side='left'),
        yaxis2=dict(title='Avg Revenue ($)', overlaying='y', side='right'),
        legend=dict(x=0.5, y=1.2, orientation='h')
    )

    fig.show()
else:
    print("⏳ This chart is only visible from 1 PM to 2 PM IST.")


⏳ This chart is only visible from 1 PM to 2 PM IST.
