In [1]:
# ------------------------------
# Python Basics - Import Libraries
# ------------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import pytz

# ------------------------------
# Introduction of Project - Data Transmission
# ------------------------------
df = pd.read_csv("googleplaystore.csv")

# ------------------------------
# Data Cleaning - Pandas Basics
# ------------------------------

# Clean Installs
df["Installs"] = df["Installs"].str.replace("+", "", regex=False)
df["Installs"] = df["Installs"].str.replace(",", "", regex=False)
df["Installs"] = pd.to_numeric(df["Installs"], errors="coerce")

# Clean Price
df["Price"] = df["Price"].str.replace("$", "", regex=False)
df["Price"] = pd.to_numeric(df["Price"], errors="coerce")

# Revenue Calculation
df["Revenue"] = df["Installs"] * df["Price"]

# Android Version Cleaning
df["Android Ver"] = df["Android Ver"].str.extract(r'(\d+\.\d+)')
df["Android Ver"] = pd.to_numeric(df["Android Ver"], errors="coerce")

# Size Cleaning (only M values)
df = df[df["Size"].str.contains("M", na=False)]
df["Size"] = df["Size"].str.replace("M", "", regex=False)
df["Size"] = pd.to_numeric(df["Size"], errors="coerce")

# Remove null values
df.dropna(inplace=True)

# ------------------------------
# Apply Required Filters
# ------------------------------
filtered = df[
    (df["Installs"] >= 10000) &
    (df["Revenue"] >= 10000) &
    (df["Android Ver"] > 4.0) &
    (df["Size"] > 15) &
    (df["Content Rating"] == "Everyone") &
    (df["App"].str.len() <= 30)
]

# ------------------------------
# Top 3 Categories by Total Installs
# ------------------------------
top3 = (
    filtered.groupby("Category")["Installs"]
    .sum()
    .sort_values(ascending=False)
    .head(3)
    .index
)

top3_data = filtered[filtered["Category"].isin(top3)]

# ------------------------------
# Free vs Paid Aggregation
# ------------------------------
result = (
    top3_data
    .groupby("Type")
    .agg({
        "Installs": "mean",
        "Revenue": "mean"
    })
)

# ------------------------------
# Time Restriction (1PMâ€“2PM IST)
# ------------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist)

if 13 <= current_time.hour < 14 and not result.empty:

    x = np.arange(len(result.index))

    fig, ax1 = plt.subplots()

    # Average Installs (Primary Axis)
    ax1.bar(x, result["Installs"], width=0.4)
    ax1.set_xlabel("App Type (Free vs Paid)")
    ax1.set_ylabel("Average Installs")
    ax1.set_xticks(x)
    ax1.set_xticklabels(result.index)

    # Average Revenue (Secondary Axis)
    ax2 = ax1.twinx()
    ax2.plot(x, result["Revenue"])
    ax2.set_ylabel("Average Revenue")

    plt.title("Dual Axis Chart: Avg Installs vs Revenue\n(Top 3 Categories)")
    plt.show()

else:
    print("Graph visible only between 1 PM IST and 2 PM IST.")

Graph visible only between 1 PM IST and 2 PM IST.
