In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pytz

# Load dataset
df = pd.read_csv("googleplaystore.csv")
df.columns = [col.strip() for col in df.columns]

# Remove missing key values (excluding Revenue, we will calculate it)
df = df.dropna(subset=['Installs', 'Android Ver', 'Size', 'Content Rating', 'App', 'Type', 'Price', 'Category'])

# Convert installs to numeric
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True).astype(float)

# Convert price to numeric
df['Price'] = df['Price'].astype(str).str.replace('$', '', regex=False).astype(float)

# Calculate Revenue
df['Revenue'] = df.apply(lambda row: row['Installs'] * row['Price'] if row['Type'] == 'Paid' else 0, axis=1)

# Convert Size to MB
def size_to_mb(size):
    size = str(size)
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size or 'K' in size:
        return float(size.replace('k', '').replace('K', '')) / 1024
    else:
        return None
df['Size_MB'] = df['Size'].astype(str).apply(size_to_mb)

# Convert Android version to float
df['Android_Ver_Num'] = df['Android Ver'].astype(str).str.extract(r'(\d+\.\d+)').astype(float)

# Apply filters
filtered_df = df[
    (df['Installs'] >= 10000) &
    (df['Revenue'] >= 10000) &
    (df['Android_Ver_Num'] > 4.0) &
    (df['Size_MB'] > 15) &
    (df['Content Rating'] == 'Everyone') &
    (df['App'].str.len() <= 30)
]

# Get top 3 categories
top_categories = (
    filtered_df.groupby('Category')['Installs']
    .mean()
    .sort_values(ascending=False)
    .head(3)
    .index
)
filtered_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# Group data
grouped = filtered_df.groupby(['Category', 'Type']).agg(
    Avg_Installs=('Installs', 'mean'),
    Avg_Revenue=('Revenue', 'mean')
).reset_index()

# Time restriction — 1 PM to 2 PM IST
india_tz = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(india_tz)

if current_time.hour == 13:
    sns.set(style="whitegrid")
    fig, ax1 = plt.subplots(figsize=(10, 6))

    sns.barplot(data=grouped, x='Category', y='Avg_Installs', hue='Type', ax=ax1, palette='Set2')
    ax1.set_ylabel('Average Installs', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')

    ax2 = ax1.twinx()
    sns.pointplot(data=grouped, x='Category', y='Avg_Revenue', hue='Type', ax=ax2,
                  palette='dark:salmon_r', legend=False)
    ax2.set_ylabel('Average Revenue (USD)', color='red')
    ax2.tick_params(axis='y', labelcolor='red')

    plt.title('Avg Installs & Revenue for Free vs Paid Apps (Top 3 Categories)', fontsize=14)
    fig.tight_layout()
    plt.show()
else:
    print("Graph hidden — Available only between 1 PM and 2 PM IST.")


Graph hidden — Available only between 1 PM and 2 PM IST.
