In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pytz

df = pd.read_csv("Play Store Data.csv")
# Convert 'Reviews' to numeric: handle 'M' and 'K'
def convert_reviews(value):
    try:
        value = str(value).strip()
        if 'M' in value:
            return float(value.replace('M', '')) * 1e6
        elif 'K' in value:
            return float(value.replace('K', '')) * 1e3
        else:
            return float(value)
    except:
        return None

df['Reviews'] = df['Reviews'].apply(convert_reviews)

# 1. Filter based on given conditions
df_filtered = df.copy()
df_filtered = df_filtered[
    (~df_filtered['App'].str.lower().str.startswith(('x', 'y', 'z'))) &
    (~df_filtered['App'].str.contains('s', case=False, na=False)) &
    (df_filtered['Category'].str.startswith(('E', 'C', 'B'))) &
    (df_filtered['Reviews'].astype(float) > 500)
]

# 2. Translate category names
df_filtered['Category_Translated'] = df_filtered['Category']
df_filtered['Category_Translated'] = df_filtered['Category_Translated'].replace({
    'Beauty': 'सौंदर्य',         # Hindi
    'Business': 'வணிகம்',       # Tamil
    'Dating': 'Partnersuche'     # German
})

# 3. Convert 'Last Updated' to datetime if available
df_filtered['Last Updated'] = pd.to_datetime(df_filtered['Last Updated'], errors='coerce')
df_filtered['Month'] = df_filtered['Last Updated'].dt.to_period('M')
# Convert Installs to numeric (remove commas, +, etc.)
df_filtered['Installs'] = df_filtered['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df_filtered['Installs'] = pd.to_numeric(df_filtered['Installs'], errors='coerce')

# 4. Group installs by month and category
df_grouped = df_filtered.groupby(['Month', 'Category_Translated'])['Installs'].sum().reset_index()
df_grouped['Month'] = df_grouped['Month'].dt.to_timestamp()

# 5. Calculate month-over-month % change
df_grouped['Pct_Change'] = df_grouped.groupby('Category_Translated')['Installs'].pct_change()

# 6. Plot (only if current time is between 6 PM and 9 PM IST)
now = datetime.now(pytz.timezone('Asia/Kolkata'))
if now.hour >= 18 and now.hour < 21:
    plt.figure(figsize=(14, 7))
    categories = df_grouped['Category_Translated'].unique()
    
    for cat in categories:
        temp_df = df_grouped[df_grouped['Category_Translated'] == cat]
        plt.plot(temp_df['Month'], temp_df['Installs'], label=cat)
        plt.fill_between(
            temp_df['Month'],
            temp_df['Installs'],
            where=temp_df['Pct_Change'] > 0.20,
            color='orange',
            alpha=0.3,
            interpolate=True
        )

    plt.title('📈 Time Series of Installs by Category with Growth Highlighted', fontsize=16)
    plt.xlabel('Month')
    plt.ylabel('Total Installs')
    plt.legend(title='App Category (Translated)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()
else:
    print("Graph is hidden: Only visible between 6 PM to 9 PM IST.")


Graph is hidden: Only visible between 6 PM to 9 PM IST.
