In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load the dataset
df = pd.read_csv("Play Store Data.csv")

# Helper function to convert Size to MB
def convert_size(size):
    try:
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
        else:
            return np.nan
    except:
        return np.nan

# Preprocess
df['Size_MB'] = df['Size'].astype(str).apply(convert_size)
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df = df[df['Installs'].str.isnumeric()]
df['Installs'] = df['Installs'].astype(int)

# 👇 Add this to calculate Revenue
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
df['Revenue'] = df.apply(lambda x: x['Price'] * x['Installs'] if x['Type'] == 'Paid' else 0, axis=1)


df['Android Ver'] = pd.to_numeric(df['Android Ver'].astype(str).str.extract(r'(\d+\.?\d*)')[0], errors='coerce')
df['App'] = df['App'].astype(str)
df['Content Rating'] = df['Content Rating'].astype(str)

# Helper function to convert Size to MB
def convert_size(size):
    try:
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
        else:
            return np.nan
    except:
        return np.nan

# Preprocess the columns
df['Size_MB'] = df['Size'].astype(str).apply(convert_size)
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df = df[df['Installs'].str.isnumeric()]
df['Installs'] = df['Installs'].astype(int)
df['Revenue'] = pd.to_numeric(df['Revenue'], errors='coerce')
df['Android Ver'] = pd.to_numeric(df['Android Ver'].astype(str).str.extract(r'(\d+\.?\d*)')[0], errors='coerce')
df['App'] = df['App'].astype(str)
df['Content Rating'] = df['Content Rating'].astype(str)

# Apply all filters
filtered_df = df[
    (df['Installs'] > 10000) &
    (df['Revenue'] > 10000) &
    (df['Android Ver'] > 4.0) &
    (df['Size_MB'] > 15) &
    (df['Content Rating'] == "Everyone") &
    (df['App'].apply(lambda x: len(x) <= 30))
]

# Get top 3 categories by total installs
top_categories = filtered_df.groupby('Category')['Installs'].sum().nlargest(3).index.tolist()
top_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# Group by Type (Free/Paid) and compute averages
summary_df = top_df.groupby(['Category', 'Type']).agg({
    'Installs': 'mean',
    'Revenue': 'mean'
}).reset_index()

# TIME FILTER — only show graph between 1PM to 2PM IST
now = datetime.now()
current_hour = now.hour
current_min = now.minute

if current_hour == 13:
    fig, ax1 = plt.subplots(figsize=(12, 6))

    ax2 = ax1.twinx()

    width = 0.4
    x = np.arange(len(summary_df))

    installs = summary_df['Installs']
    revenue = summary_df['Revenue']

    ax1.bar(x - width/2, installs, width=width, color='skyblue', label='Average Installs')
    ax2.bar(x + width/2, revenue, width=width, color='lightcoral', label='Average Revenue')

    ax1.set_ylabel('Average Installs')
    ax2.set_ylabel('Average Revenue ($)')
    ax1.set_title('Average Installs vs Revenue by App Type (Top 3 Categories)')

    ax1.set_xticks(x)
    ax1.set_xticklabels(summary_df['Category'] + ' - ' + summary_df['Type'])

    # Combine legends
    handles1, labels1 = ax1.get_legend_handles_labels()
    handles2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(handles1 + handles2, labels1 + labels2, loc='upper left')

    plt.tight_layout()
    plt.show()
else:
    print("⚠️ This chart is only visible between 1 PM to 2 PM IST. Current time:", now.strftime("%H:%M"))


⚠️ This chart is only visible between 1 PM to 2 PM IST. Current time: 15:50


In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load the dataset
df = pd.read_csv("Play Store Data.csv")


In [11]:
# Helper function to convert Size to MB
def convert_size(size):
    try:
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
        else:
            return np.nan
    except:
        return np.nan

In [13]:
# Preprocess
df['Size_MB'] = df['Size'].astype(str).apply(convert_size)
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df = df[df['Installs'].str.isnumeric()]
df['Installs'] = df['Installs'].astype(int)


In [15]:
# calculate Revenue
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
df['Revenue'] = df.apply(lambda x: x['Price'] * x['Installs'] if x['Type'] == 'Paid' else 0, axis=1)


df['Android Ver'] = pd.to_numeric(df['Android Ver'].astype(str).str.extract(r'(\d+\.?\d*)')[0], errors='coerce')
df['App'] = df['App'].astype(str)
df['Content Rating'] = df['Content Rating'].astype(str)

In [17]:
# Helper function to convert Size to MB
def convert_size(size):
    try:
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
        else:
            return np.nan
    except:
        return np.nan


In [19]:
# Preprocess the columns
df['Size_MB'] = df['Size'].astype(str).apply(convert_size)
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df = df[df['Installs'].str.isnumeric()]
df['Installs'] = df['Installs'].astype(int)
df['Revenue'] = pd.to_numeric(df['Revenue'], errors='coerce')
df['Android Ver'] = pd.to_numeric(df['Android Ver'].astype(str).str.extract(r'(\d+\.?\d*)')[0], errors='coerce')
df['App'] = df['App'].astype(str)
df['Content Rating'] = df['Content Rating'].astype(str)


In [21]:
# Apply all filters
filtered_df = df[
    (df['Installs'] > 10000) &
    (df['Revenue'] > 10000) &
    (df['Android Ver'] > 4.0) &
    (df['Size_MB'] > 15) &
    (df['Content Rating'] == "Everyone") &
    (df['App'].apply(lambda x: len(x) <= 30))
]


In [23]:
# Get top 3 categories by total installs
top_categories = filtered_df.groupby('Category')['Installs'].sum().nlargest(3).index.tolist()
top_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# Group by Type (Free/Paid) and compute averages
summary_df = top_df.groupby(['Category', 'Type']).agg({
    'Installs': 'mean',
    'Revenue': 'mean'
}).reset_index()

In [25]:
# TIME FILTER — only show graph between 1PM to 2PM IST
now = datetime.now()
current_hour = now.hour
current_min = now.minute

if current_hour == 13:
    fig, ax1 = plt.subplots(figsize=(12, 6))

    ax2 = ax1.twinx()

    width = 0.4
    x = np.arange(len(summary_df))

    installs = summary_df['Installs']
    revenue = summary_df['Revenue']

    ax1.bar(x - width/2, installs, width=width, color='skyblue', label='Average Installs')
    ax2.bar(x + width/2, revenue, width=width, color='lightcoral', label='Average Revenue')

    ax1.set_ylabel('Average Installs')
    ax2.set_ylabel('Average Revenue ($)')
    ax1.set_title('Average Installs vs Revenue by App Type (Top 3 Categories)')

    ax1.set_xticks(x)
    ax1.set_xticklabels(summary_df['Category'] + ' - ' + summary_df['Type'])

    # Combine legends
    handles1, labels1 = ax1.get_legend_handles_labels()
    handles2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(handles1 + handles2, labels1 + labels2, loc='upper left')

    plt.tight_layout()
    plt.show()
else:
    print("This chart is only visible between 1 PM to 2 PM IST. Current time:", now.strftime("%H:%M"))


⚠️ This chart is only visible between 1 PM to 2 PM IST. Current time: 16:46
