In [3]:
import os
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

file_path = r"E:\NULLCLASS\datasets\cleaned_data.csv"
df = pd.read_csv(file_path)

df.columns = df.columns.str.strip()
print("Available Columns:", df.columns)

required_columns = ["Installs", "Price", "Android Ver", "Size", "Content Rating", "App", "Type", "Category"]
missing_columns = [col for col in required_columns if col not in df.columns]

if missing_columns:
    print(f"ERROR: Missing columns in dataset: {missing_columns}")
    exit()

df["Installs"] = pd.to_numeric(df["Installs"], errors="coerce")
df["Price"] = pd.to_numeric(df["Price"], errors="coerce")
df["Revenue"] = df["Price"] * df["Installs"]

def convert_size(size):
    if isinstance(size, str):
        size = size.replace("M", "").replace(",", "")
        if "k" in size or "K" in size:
            return float(size.replace("k", "").replace("K", "")) / 1024
        return float(size) if size.replace(".", "").isdigit() else None
    return size

df["Size"] = df["Size"].apply(convert_size)
df["Android Ver"] = pd.to_numeric(df["Android Ver"].str.extract(r'(\d+\.\d+)')[0], errors='coerce')

df = df[(df["Installs"] > 10000) & 
        (df["Revenue"] > 10000) & 
        (df["Android Ver"] > 4.0) & 
        (df["Size"] > 15) & 
        (df["Content Rating"] == "Everyone")]

df = df[df["App"].apply(lambda x: len(str(x)) <= 30)]

top_categories = df.groupby("Category")["Installs"].sum().nlargest(3).index
df = df[df["Category"].isin(top_categories)]

category_summary = df.groupby(["Category", "Type"]).agg(
    Avg_Installs=("Installs", "mean"),
    Avg_Revenue=("Revenue", "mean")
).reset_index()

ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("13:00:00", "%H:%M:%S").time()
end_time = datetime.strptime("14:00:00", "%H:%M:%S").time()

save_path = r"E:\NULLCLASS\tasks\Dual_axis.html"

if start_time <= current_time <= end_time:
    fig = go.Figure()
    fig.add_trace(go.Bar(x=category_summary["Category"], 
                         y=category_summary["Avg_Installs"], 
                         name="Average Installs", marker_color="blue"))

    fig.add_trace(go.Scatter(x=category_summary["Category"], 
                             y=category_summary["Avg_Revenue"], 
                             name="Average Revenue", marker_color="red", 
                             mode="lines+markers"))

    fig.update_layout(title="Comparison of Average Installs and Revenue (Free vs Paid)",
                      xaxis_title="Category", yaxis_title="Count",
                      legend_title="Metric")

    fig.write_html(save_path)
    print(f"Graph saved at: {save_path}")

else:
    with open(save_path, "w", encoding="utf-8") as file:
        file.write("<html><body><h1>This graph is only available between 1 PM - 2 PM IST.</h1></body></html>")
    print("Graph not available right now.")


Available Columns: Index(['App', 'Translated_Review', 'Sentiment', 'Sentiment_Polarity',
       'Sentiment_Subjectivity', 'Category', 'Rating', 'Reviews', 'Size',
       'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated',
       'Current Ver', 'Android Ver', 'Rating Group'],
      dtype='object')
Graph not available right now.
