In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pytz

# Load dataset
df = pd.read_csv(r"E:\NULLCLASS\Play Store Data.csv")

# Ensure 'Installs' column contains only numeric values
df = df[df['Installs'].str.contains(r'^\d', na=False)]  # Remove 'Free' values
df['Installs'] = df['Installs'].str.replace(r'[+,]', '', regex=True).astype(float)

# Convert 'Reviews' to numeric
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

# Convert 'Last Updated' to datetime format
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')

# Filter: Only apps updated within the last year
one_year_ago = datetime.now() - timedelta(days=365)
df = df[df['Last Updated'] >= one_year_ago]

# Filter: Apps with at least 100,000 installs and more than 1,000 reviews
df = df[(df['Installs'] >= 100000) & (df['Reviews'] > 1000)]

# Filter: Genres should NOT start with A, F, E, G, I, or K
df = df[~df['Genres'].str.startswith(('A', 'F', 'E', 'G', 'I', 'K'), na=False)]

# Get current time in IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()

# Heatmap should only be visible between 2 PM - 4 PM IST
if current_time >= datetime.strptime("14:00:00", "%H:%M:%S").time() and current_time <= datetime.strptime("16:00:00", "%H:%M:%S").time():
    # Compute correlation matrix
    corr_matrix = df[['Installs', 'Rating', 'Reviews']].corr()

    # Plot heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
    plt.title("Correlation Matrix: Installs, Ratings, and Reviews")
    plt.show()
else:
    print("Heatmap is only visible between 2 PM - 4 PM IST.")


Heatmap is only visible between 2 PM - 4 PM IST.
