In [21]:
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
from datetime import datetime
import pytz
import numpy as np
import nltk
nltk.download('punkt')

# Load your dataset
df = pd.read_csv("Play Store Data.csv")

# Create sentiment subjectivity from 'App' column (since 'Description' is not available)
df['Sentiment_Subjectivity'] = df['App'].astype(str).apply(lambda x: TextBlob(x).sentiment.subjectivity)

# Clean 'Size' column
def convert_size(size_str):
    try:
        if 'M' in size_str:
            return float(size_str.replace('M', ''))
        elif 'k' in size_str:
            return float(size_str.replace('k', '')) / 1024
        else:
            return np.nan
    except:
        return np.nan

df['Size_MB'] = df['Size'].astype(str).apply(convert_size)

# Clean 'Installs'
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df = df[df['Installs'].str.isnumeric()]
df['Installs'] = df['Installs'].astype(int)

# Clean 'Reviews'
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

# Apply all filters
categories_allowed = ['GAME', 'BEAUTY', 'BUSINESS', 'COMICS', 'COMMUNICATION', 'DATING', 'ENTERTAINMENT', 'SOCIAL', 'EVENT']
df_filtered = df[
    (df['Rating'] > 3.5) &
    (df['Category'].str.upper().isin(categories_allowed)) &
    (df['Reviews'] > 500) &
    (~df['App'].str.contains('s', case=False, na=False)) &
    (df['Sentiment_Subjectivity'] > 0.5) &
    (df['Installs'] > 50000)
].copy()

# Translate categories
def translate_category(cat):
    if cat.upper() == 'BEAUTY':
        return 'सौंदर्य'  # Hindi
    elif cat.upper() == 'BUSINESS':
        return 'வணிகம்'  # Tamil
    elif cat.upper() == 'DATING':
        return 'Partnersuche'  # German
    else:
        return cat

df_filtered['Category_Translated'] = df_filtered['Category'].apply(translate_category)

# Time check
now = datetime.now(pytz.timezone('Asia/Kolkata'))
if now.hour >= 17 and now.hour < 19:  # 5 PM to 7 PM IST
    plt.figure(figsize=(12, 8))
    bubble_sizes = df_filtered['Installs'] / 10000

    colors = df_filtered['Category'].apply(lambda x: 'pink' if x.upper() == 'GAME' else 'blue')

    plt.scatter(
        df_filtered['Size_MB'],
        df_filtered['Rating'],
        s=bubble_sizes,
        c=colors,
        alpha=0.6,
        edgecolors='w',
        linewidth=0.5
    )

    plt.xlabel('App Size (MB)')
    plt.ylabel('Average Rating')
    plt.title('Bubble Chart: Size vs Rating (Bubble Size = Installs)')
    plt.grid(True)
    plt.show()
else:
    print("This chart is only visible between 5 PM and 7 PM IST.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pranchal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


This chart is only visible between 5 PM and 7 PM IST.
