In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pytz

df = pd.read_csv(r"E:\NULLCLASS\datasets\cleaned_data.csv")

# Convert 'Installs' to numeric (handling non-numeric values)
df = df[df['Installs'].apply(lambda x: str(x).isdigit())]  # Remove invalid values
df['Installs'] = df['Installs'].astype(int)

# Convert 'Reviews' to numeric
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

# Convert 'Last Updated' to datetime format
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')

# Filter: Apps updated within the last 1 year
one_year_ago = datetime.now() - timedelta(days=365)
df = df[df['Last Updated'] >= one_year_ago]

# Filter: Apps with at least 100,000 installs and more than 1,000 reviews
df = df[(df['Installs'] >= 100000) & (df['Reviews'] > 1000)]

# Filter: Exclude specific Genres (A, F, E, G, I, K)
df = df[~df['Genres'].str.startswith(('A', 'F', 'E', 'G', 'I', 'K'), na=False)]

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()

# Heatmap should only be displayed between 2 PM - 4 PM IST
start_time = datetime.strptime("14:00:00", "%H:%M:%S").time()
end_time = datetime.strptime("16:00:00", "%H:%M:%S").time()

if start_time <= current_time <= end_time:
    corr_matrix = df[['Installs', 'Rating', 'Reviews']].corr()

    # Plot heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)


    plt.title("Correlation Matrix: Installs, Ratings, and Reviews")

    plt.savefig(r"E:\NULLCLASS\tasks\Correlation_Heatmap.png", dpi=300, bbox_inches="tight")

    plt.show()

    print("Heatmap generated and saved as 'Correlation_Heatmap.png'")
else:
    print(" Heatmap is only visible between 2 PM - 4 PM IST.")


 Heatmap is only visible between 2 PM - 4 PM IST.
