In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pytz
from textblob import TextBlob

# Load dataset
df = pd.read_csv("googleplaystore.csv")
df.columns = [col.strip() for col in df.columns]

# Convert numeric columns safely
df['Installs'] = df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True)
df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')

df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

# Remove missing key values after conversion
df = df.dropna(subset=['Installs', 'Rating', 'Reviews', 'Size', 'Category', 'App'])

# Convert Size to MB
def size_to_mb(size):
    size = str(size)
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size or 'K' in size:
        return float(size.replace('k', '').replace('K', '')) / 1024
    else:
        return None

df['Size_MB'] = df['Size'].astype(str).apply(size_to_mb)

# Sentiment subjectivity from App name (short text but per requirement)
df['sentiment_subjectivity'] = df['App'].apply(lambda x: TextBlob(str(x)).sentiment.subjectivity)

# Standardize category names to avoid typos
df['Category'] = df['Category'].str.strip().str.title()  # e.g., "commics" -> "Commics"

# Filter categories as per requirement (case-insensitive match)
categories_needed = [
    'Game', 'Beauty', 'Business', 'Commics', 'Commication',
    'Dating', 'Entertainment', 'Social', 'Events'
]
df = df[df['Category'].isin(categories_needed)]

# Apply all filters
filtered_df = df[
    (df['Rating'] > 3.5) &
    (df['Reviews'] > 500) &
    (~df['App'].str.contains("S", case=False)) &
    (df['sentiment_subjectivity'] > 0.5) &
    (df['Installs'] > 50000)
]

# Translate category names
translation_map = {
    'Beauty': 'सौंदर्य',   # Hindi
    'Business': 'வணிகம்',  # Tamil
    'Dating': 'Dating'     # German translation remains same
}
filtered_df['Category'] = filtered_df['Category'].replace(translation_map)

# Time restriction — 5 PM to 7 PM IST
india_tz = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(india_tz)

if 17<= current_time.hour < 19:
    if filtered_df.empty:
        print(" No data matches the filter criteria.")
    else:
        plt.figure(figsize=(10, 6))
        sns.set(style="whitegrid")

        # Assign colors — Game category gets pink
        bubble_colors = filtered_df['Category'].apply(lambda x: 'pink' if x == 'Game' or x == 'गेம்' else 'skyblue')

        # Bubble chart
        plt.scatter(
            filtered_df['Size_MB'],
            filtered_df['Rating'],
            s=filtered_df['Installs'] / 1000,  # bubble size
            c=bubble_colors,
            alpha=0.6,
            edgecolors='w',
            linewidth=0.5
        )

        plt.xlabel('App Size (MB)')
        plt.ylabel('Average Rating')
        plt.title('Bubble Chart: App Size vs Rating (Bubble Size = Installs)')
        plt.grid(True)
        plt.show()
else:
    print("Graph hidden — Available only between 5 PM and 7 PM IST.")


Graph hidden — Available only between 5 PM and 7 PM IST.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Category'] = filtered_df['Category'].replace(translation_map)
