In [26]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os
from IPython.display import display, HTML
import altair as alt
import datetime
import pytz
import os
import re

nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\91866\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [38]:
#Task 1

def load_and_clean_data(file_path):
    if not os.path.exists(file_path):
        print(f"CRITICAL ERROR: Main file not found at {file_path}")
        return None
        
    df = pd.read_csv(file_path)
    df['Installs'] = df['Installs'].str.replace(r'[+,]', '', regex=True)
    df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')
    df['Size_MB'] = df['Size'].astype(str).str.replace('M', '')
    df['Size_MB'] = df['Size_MB'].str.replace('k', 'e-3')
    df['Size_MB'] = pd.to_numeric(df['Size_MB'], errors='coerce')
    df['Price'] = df['Price'].astype(str).str.replace('$', '')
    df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
    df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')
    df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
    df['Android_Ver_Clean'] = df['Android Ver'].astype(str).str.split(' ').str[0]
    df['Android_Ver_Clean'] = df['Android_Ver_Clean'].str.slice(0, 3)
    df['Android_Ver_Clean'] = pd.to_numeric(df['Android_Ver_Clean'], errors='coerce')
    df.rename(columns={'Content Rating': 'Content_Rating'}, inplace=True)
    df['Revenue'] = df['Price'] * df['Installs']
    df.dropna(subset=[
        'Installs', 'Size_MB', 'Last Updated', 'Rating', 
        'Reviews', 'Type', 'Price', 'Content_Rating', 
        'Android_Ver_Clean', 'App', 'Category'
    ], inplace=True)
    df = df.drop_duplicates(subset=['App', 'Category'])
    return df

def check_time(start_hour, end_hour, current_time):
    start_time = datetime.time(start_hour, 0)
    end_time = datetime.time(end_hour, 0)
    return start_time <= current_time < end_time

def save_plot_as_html(chart, filename):
    try:
        chart.save(filename)
        print(f"Chart successfully saved to {filename}")
    except Exception as e:
        print(f"Error saving chart {filename}: {e}")

def create_dashboard(charts_data, filename="dashboard.html"):
    html_content = """
    <!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>App Dashboard</title><style>
    body { font-family: 'Arial', sans-serif; background-color: #f4f4f4; margin: 0; padding: 20px; }
    h1 { text-align: center; color: #333; } .dashboard-grid { display: grid;
    grid-template-columns: repeat(auto-fit, minmax(600px, 1fr)); gap: 20px; padding: 20px; }
    .chart-container { border: 1px solid #ddd; border-radius: 8px; background-color: #ffffff;
    padding: 15px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
    .chart-container h2 { font-size: 1.2em; color: #555; margin-top: 0; }
    iframe { width: 100%; height: 450px; border: none; }
    .unavailable { height: 450px; display: flex; align-items: center; justify-content: center;
    flex-direction: column; color: #999; }
    .unavailable h3 { font-size: 1.5em; margin: 0; }
    </style></head><body><h1>Google Play Store App Dashboard</h1><div class="dashboard-grid">
    """
    for chart in charts_data:
        html_content += f"""<div class="chart-container"><h2>{chart['title']}</h2>"""
        if chart['file_path']:
            html_content += f"""<iframe src="{chart['file_path']}"></iframe>"""
        else:
            html_content += f"""<div class="unavailable"><h3>Chart Unavailable</h3><p>{chart['message']}</p></div>"""
        html_content += """</div>"""
    html_content += """</div></body></html>"""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"\nSuccessfully generated dashboard: {filename}")
        display(HTML(f"<h2><a href='{filename}' target='_blank'>Click to View Dashboard</a></h2>"))
    except Exception as e:
        print(f"Error writing dashboard file: {e}")

def task_grouped_bar(df):
    df_task = df.copy()
    
    df_app_filtered = df_task[
        (df_task['Size_MB'] >= 10) & 
        (df_task['Last Updated'].dt.month == 1)
    ]
    
    if df_app_filtered.empty:
        print("Task 1: No apps found matching Size and Date filters.")
        return None

    df_category_summary = df_app_filtered.groupby('Category', as_index=False).agg(
        Average_Rating=('Rating', 'mean'),
        Total_Installs=('Installs', 'sum'),
        Total_Reviews=('Reviews', 'sum')
    )
    
    if df_category_summary.empty:
        print("Task 1: No categories found after grouping.")
        return None

    df_category_filtered = df_category_summary[
        df_category_summary['Average_Rating'] >= 4.0
    ]
    
    if df_category_filtered.empty:
        print("Task 1: No categories found with Avg Rating >= 4.0.")
        return None

    df_top_10 = df_category_filtered.nlargest(10, 'Total_Installs')
    
    if df_top_10.empty:
        print("Task 1: No data left for Top 10.")
        return None
        
    df_plot_data = df_top_10[['Category', 'Average_Rating', 'Total_Reviews']]
    
    df_melted = df_plot_data.melt('Category', var_name='Metric', value_name='Value')

    chart = alt.Chart(df_melted).mark_bar().encode(
        x=alt.X('Category:N', sort='-y', title='App Category'),
        y=alt.Y('Value:Q', title='Value'),
        color=alt.Color('Metric:N', title='Metric'),
        tooltip=['Category', 'Metric', 'Value']
    ).properties(
        title='Average Rating and Total Reviews by Top 10 App Categories (Filtered)',
        width=700,
        height=400
    ).interactive()

    return chart


file_path = r'C:\Users\91866\Desktop\Projects-3\googleplaystore.csv'
IST = pytz.timezone('Asia/Kolkata')
now = datetime.datetime.now(IST)
current_time = now.time()
print(f"--- Dashboard Generation Started ---")
print(f"Current IST time: {current_time.strftime('%H:%M:%S')}")

df_master = None
if not os.path.exists(file_path):
    print(f"Error: Dataset file not found at {file_path}")
else:
    try:
        df_master = load_and_clean_data(file_path)
        print("Data loaded and cleaned successfully.")
    except Exception as e:
        print(f"Error loading or cleaning data: {e}")

charts_data = []

print("\nProcessing Task 1 (Bar Chart)...")
if df_master is not None and check_time(15, 17, current_time):
    print("Time is within 3 PM - 5 PM window. Running task...")
    chart1 = task_grouped_bar(df_master)
    
    if chart1:
        save_plot_as_html(chart1, 'task_1_bar_chart.html')
        charts_data.append({
            'title': 'Task 1: Top 10 Categories (Jan, >4.0 Rating, >10M)',
            'file_path': 'task_1_bar_chart.html',
            'message': ''
        })
    else:
        print("Task 1 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 1: Grouped Bar Chart',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 1 skipped: Main data failed to load.")
    else:
        print("Task 1 is unavailable outside 3 PM – 5 PM IST.")
        charts_data.append({
            'title': 'Task 1: Grouped Bar Chart',
            'file_path': None, 
            'message': 'This chart is only available between 3 PM and 5 PM IST.'
        })

print("\n--- All tasks processed. Generating dashboard.html ---")
if not charts_data:
    print("No charts were generated to add to the dashboard.")
else:
    create_dashboard(charts_data, "dashboard.html")

--- Dashboard Generation Started ---
Current IST time: 16:15:25
Data loaded and cleaned successfully.

Processing Task 1 (Bar Chart)...
Time is within 3 PM - 5 PM window. Running task...
Chart successfully saved to task_1_bar_chart.html

--- All tasks processed. Generating dashboard.html ---

Successfully generated dashboard: dashboard.html


In [28]:
#Task 2

if check_time(18, 20, current_time):
    print("Running Task 2: Time is within the 6 PM - 8 PM window.")
    
    if 'df_master' not in locals() or df_master is None:
        print("Error: Task 2 cannot run because df_master was not created.")
        print("Please check for errors in the previous cell.")
    else:
        try:
            df2 = df_master.copy()
            df2 = df2[~df2['Category'].str.startswith(tuple(['A','C','G','S']))]
            df2 = df2[df2['Installs'] > 1_000_000]
            
            top5 = df2.groupby('Category')['Installs'].sum().nlargest(5).index
            df2 = df2[df2['Category'].isin(top5)]

            if df2.empty:
                print("No data found matching all filters for Task 2.")
            else:
                print(f"Found {len(df2)} apps to plot after all filters.")
                
                df2_agg = df2.groupby('Category')['Installs'].sum().reset_index()

                chart2 = alt.Chart(df2_agg).mark_bar().encode(
                    x=alt.X('Category', sort='-y'),
                    y=alt.Y('Installs'),
                    color='Category',
                    tooltip=['Category', 'Installs']
                ).properties(
                    title='Global Installs by Category (Top 5, Filtered)'
                ).interactive()

                print("Chart generation complete. Displaying chart...")
                chart2.display()

        except Exception as e:
            print(f"An error occurred while generating Task 2: {e}")
else:
    print("Task 2 (Choropleth Map) will not run. It is only available between 6 PM and 8 PM IST.")

Task 2 (Choropleth Map) will not run. It is only available between 6 PM and 8 PM IST.


In [29]:
# Task 3

import plotly.graph_objects as go
from plotly.subplots import make_subplots

if check_time(13, 14, current_time):
    print("Running Task 3: Time is within the 1 PM - 2 PM window.")
    
    if 'df_master' not in locals() or df_master is None:
        print("Error: Task 3 cannot run because df_master was not created.")
        print("Please check for errors in the previous cell.")
    else:
        try:
            top_3_cats = df_master.groupby('Category')['Installs'].sum().nlargest(3).index
            print(f"Top 3 Categories: {list(top_3_cats)}")

            df_task3_filtered = df_master[
                (df_master['Category'].isin(top_3_cats)) &
                (df_master['Installs'] >= 10000) &
                (df_master['Android_Ver_Clean'] > 4.0) &
                (df_master['Size_MB'] > 15) &
                (df_master['Content_Rating'] == 'Everyone') &
                (df_master['App'].str.len() <= 30)
            ].copy()
            
            df_task3_filtered = df_task3_filtered[
                (df_task3_filtered['Type'] == 'Free') | 
                (df_task3_filtered['Revenue'] > 10000)
            ]

            if df_task3_filtered.empty:
                print("No data found matching all filters for Task 3.")
            else:
                print(f"Found {len(df_task3_filtered)} apps to plot after all filters.")
                
                df_agg = df_task3_filtered.groupby(['Category', 'Type']).agg(
                    Average_Installs=('Installs', 'mean'),
                    Average_Revenue=('Revenue', 'mean')
                ).reset_index()

                if df_agg.empty:
                    print("No data to plot after aggregation.")
                else:
                    df_free = df_agg[df_agg['Type'] == 'Free'].set_index('Category')
                    df_paid = df_agg[df_agg['Type'] == 'Paid'].set_index('Category')
                    all_cats = df_agg['Category'].unique()
                    
                    df_free = df_free.reindex(all_cats)
                    df_paid = df_paid.reindex(all_cats)

                    fig = make_subplots(specs=[[{"secondary_y": True}]])

                    fig.add_trace(
                        go.Bar(x=all_cats, y=df_free['Average_Installs'], name='Avg Installs (Free)', marker_color='blue'),
                        secondary_y=False
                    )
                    fig.add_trace(
                        go.Bar(x=all_cats, y=df_free['Average_Revenue'], name='Avg Revenue (Free)', marker_color='red'),
                        secondary_y=True
                    )
                    
                    fig.add_trace(
                        go.Bar(x=all_cats, y=df_paid['Average_Installs'], name='Avg Installs (Paid)', marker_color='lightblue'),
                        secondary_y=False
                    )
                    fig.add_trace(
                        go.Bar(x=all_cats, y=df_paid['Average_Revenue'], name='Avg Revenue (Paid)', marker_color='orange'),
                        secondary_y=True
                    )

                    fig.update_layout(
                        title_text="Avg Installs vs. Revenue for Top 3 Categories (Free vs. Paid)",
                        barmode='group',
                        xaxis_title="App Category"
                    )
                    fig.update_yaxes(title_text="Average Installs", secondary_y=False, tickformat=',.0f')
                    fig.update_yaxes(title_text="Average Revenue ($)", secondary_y=True, tickformat='$,.0f')

                    print("Chart generation complete. Displaying chart...")
                    fig.show()

        except Exception as e:
            print(f"An error occurred while generating Task 3: {e}")
else:
    print("Task 3 (Dual-Axis Chart) will not run. It is only available between 1 PM and 2 PM IST.")

Task 3 (Dual-Axis Chart) will not run. It is only available between 1 PM and 2 PM IST.


In [30]:
# Task 4

if check_time(18, 21, current_time):
    print("Running Task 4: Time is within the 6 PM - 9 PM window.")
    
    if 'df_master' not in locals() or df_master is None:
        print("Error: Task 4 cannot run because df_master was not created.")
        print("Please check for errors in the previous cell.")
    else:
        try:
            df4 = df_master.copy()
            df4 = df4[(df4['Reviews'] > 500) &
                      (df4['Category'].str.startswith(tuple(['E','C','B']))) &
                      (~df4['App'].str.lower().str.startswith(tuple(['x','y','z']))) &
                      (~df4['App'].str.contains('S', case=False))]
            
            if df4.empty:
                print("No data found matching all filters for Task 4.")
            else:
                print(f"Found {len(df4)} apps to plot after all filters.")
                
                translation = {'Beauty': 'सौंदर्य', 'Business': 'வணிகம்', 'Dating': 'Verabredung'}
                df4['Category'] = df4['Category'].replace(translation)
                
                df4_grouped = df4.groupby([pd.Grouper(key='Last Updated', freq='M'), 'Category'])['Installs'].sum().reset_index()
                
                if df4_grouped.empty:
                    print("No data to plot after grouping.")
                else:
                    df4_grouped['Growth'] = df4_grouped.groupby('Category')['Installs'].pct_change()
                    df4_grouped['Highlight'] = df4_grouped['Growth'] > 0.2
                    
                    chart4 = alt.Chart(df4_grouped).mark_area().encode(
                        x='yearmonth(Last Updated):T',
                        y='Installs:Q',
                        color='Category:N',
                        opacity=alt.condition('datum.Highlight', alt.value(0.9), alt.value(0.4)),
                        tooltip=['Last Updated', 'Category', 'Installs', 'Growth']
                    ).properties(
                        title='Total Installs Trend by Category (Filtered)'
                    ).interactive()
                    
                    print("Chart generation complete. Displaying chart...")
                    chart4.display()

        except Exception as e:
            print(f"An error occurred while generating Task 4: {e}")
else:
    print("Task 4 (Time Series Line Chart) will not run. It is only available between 6 PM and 9 PM IST.")

Task 4 (Time Series Line Chart) will not run. It is only available between 6 PM and 9 PM IST.


In [31]:
import pandas as pd
import altair as alt
import datetime
import pytz
import os
import re
from IPython.display import display, HTML


if check_time(17, 19, current_time):
    print("Running Task 5: Time is within the 5 PM - 7 PM window.")
    
 
    if df_master_task5 is None:
        print("Error: Task 5 cannot run because df_master_task5 was not created.")
    else:
        try:
            
            categories_to_include = [
                'GAME', 'BEAUTY', 'BUSINESS', 'COMICS', 'COMMUNICATION', 
                'DATING', 'ENTERTAINMENT', 'SOCIAL', 'EVENTS'
            ]
            
            df_task5_filtered = df_master_task5[
                (df_master_task5['Rating'] > 3.5) &
                (df_master_task5['Category'].isin(categories_to_include)) &
                (df_master_task5['Reviews'] > 500) &
                (~df_master_task5['App'].str.lower().str.contains('s')) &
                (df_master_task5['Installs'] > 50000) &
                (df_master_task5['Sentiment_Subjectivity'] > 0.5)
            ].copy()

            if df_task5_filtered.empty:
                print("No data found matching all filters for Task 5.")
            else:
                print(f"Found {len(df_task5_filtered)} apps to plot for Task 5.")
                
              
                translation_map = {
                    'BEAUTY': 'सौंदर्य',
                    'BUSINESS': 'வணிகம்',
                    'DATING': 'Partnersuche'
                }
                df_task5_filtered['Category'] = df_task5_filtered['Category'].replace(translation_map)
                
              
                domain_categories = df_task5_filtered['Category'].unique().tolist()
                range_colors = ['blue'] * len(domain_categories) 
                
                try:
                    game_index = domain_categories.index('GAME')
                    range_colors[game_index] = 'pink'
                except ValueError:
                    pass 
                    
                bubble_chart = alt.Chart(df_task5_filtered).mark_circle().encode(
                    x=alt.X('Size_MB', title='Size (MB)', scale=alt.Scale(zero=False)),
                    y=alt.Y('Rating', title='Average Rating', scale=alt.Scale(zero=False)),
                    size=alt.Size('Installs', title='Number of Installs', legend=None),
                    color=alt.Color('Category', title='Category',
                                    scale=alt.Scale(domain=domain_categories, range=range_colors)
                                   ),
                    tooltip=['App', 'Category', 'Rating', 'Size_MB', 'Installs', 'Sentiment_Subjectivity']
                ).properties(
                    title='App Analysis: Size vs. Rating (Bubble size: Installs)',
                ).interactive()

              
                chart_filename = 'task_5_bubble_chart.html'
                bubble_chart.save(chart_filename)
                
                print(f"Successfully saved chart to {chart_filename}")
                print("Displaying chart...")
                display(HTML(filename=chart_filename))
                print("Chart generation complete.")

        except Exception as e:
            print(f"An error occurred while generating Task 5: {e}")
else:
    print("Task 5 (Bubble Chart) will not run. It is only available between 5 PM and 7 PM IST.")

Task 5 (Bubble Chart) will not run. It is only available between 5 PM and 7 PM IST.


In [32]:
# Task 6

if check_time(16, 18, current_time):
    print("Running Task 6: Time is within the 4 PM - 6 PM window.")
    
    if 'df_master' not in locals() or df_master is None:
        print("Error: Task 6 cannot run because df_master was not created.")
        print("Please check for errors in the previous cell.")
    else:
        try:
            df6 = df_master.copy()
            df6 = df6[(df6['Rating'] >= 4.2) &
                      (df6['Reviews'] > 1000) &
                      (df6['Size_MB'].between(20, 80)) &
                      (df6['Category'].str.startswith(tuple(['T','P']))) &
                      (~df6['App'].str.contains(r'\d'))]
            
            if df6.empty:
                print("No data found matching all filters for Task 6.")
            else:
                print(f"Found {len(df6)} apps to plot after all filters.")
                
                translation = {'Travel & Local': 'Voyage et Local', 'Productivity': 'Productividad', 'Photography': '写真'}
                df6['Category'] = df6['Category'].replace(translation)
                
                df6_grouped = df6.groupby([pd.Grouper(key='Last Updated', freq='M'), 'Category'])['Installs'].sum().reset_index()
                
                if df6_grouped.empty:
                    print("No data to plot after grouping.")
                else:
                    df6_grouped['Growth'] = df6_grouped.groupby('Category')['Installs'].pct_change()
                    df6_grouped['Highlight'] = df6_grouped['Growth'] > 0.25
                    
                    chart6 = alt.Chart(df6_grouped).mark_area().encode(
                        x='yearmonth(Last Updated):T',
                        y='Installs:Q',
                        color=alt.Color('Category:N'),
                        opacity=alt.condition('datum.Highlight', alt.value(0.9), alt.value(0.5)),
                        tooltip=['Last Updated', 'Category', 'Installs', 'Growth']
                    ).properties(
                        title='Cumulative Installs Over Time by Category (Filtered)'
                    ).interactive()
                    
                    print("Chart generation complete. Displaying chart...")
                    chart6.display()

        except Exception as e:
            print(f"An error occurred while generating Task 6: {e}")
else:
    print("Task 6 (Stacked Area Chart) will not run. It is only available between 4 PM and 6 PM IST.")

Task 6 (Stacked Area Chart) will not run. It is only available between 4 PM and 6 PM IST.


--- Dashboard Generation Started ---
Current IST time: 15:58:01
Data loaded and cleaned successfully.

Processing Task 1 (Bar Chart)...
Time is within 3 PM - 5 PM window. Running task...
Chart successfully saved to task_1_bar_chart.html

--- All tasks processed. Generating dashboard.html ---

Successfully generated dashboard: dashboard.html


In [None]:
import pandas as pd
import altair as alt
import datetime
import pytz
import os
import re
from IPython.display import display, HTML
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import nltk

try:
    nltk.data.find('sentiment/vader_lexicon.zip')
except LookupError:
    nltk.download('vader_lexicon')

def load_and_clean_data(file_path):
    if not os.path.exists(file_path):
        print(f"CRITICAL ERROR: Main file not found at {file_path}")
        return None
        
    df = pd.read_csv(file_path)
    df['Installs'] = df['Installs'].str.replace(r'[+,]', '', regex=True)
    df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')
    
    df['Size_MB'] = df['Size'].astype(str).str.replace('M', '')
    df['Size_MB'] = df['Size_MB'].str.replace('k', 'e-3')
    df['Size_MB'] = pd.to_numeric(df['Size_MB'], errors='coerce')
    
    df['Price'] = df['Price'].astype(str).str.replace('$', '')
    df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
    
    df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')
    
    df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
    
    df['Android_Ver_Clean'] = df['Android Ver'].astype(str).str.split(' ').str[0]
    df['Android_Ver_Clean'] = df['Android_Ver_Clean'].str.slice(0, 3)
    df['Android_Ver_Clean'] = pd.to_numeric(df['Android_Ver_Clean'], errors='coerce')
    
    df.rename(columns={'Content Rating': 'Content_Rating'}, inplace=True)
    
    df['Revenue'] = df['Price'] * df['Installs']
    
    df.dropna(subset=[
        'Installs', 'Size_MB', 'Last Updated', 'Rating', 
        'Reviews', 'Type', 'Price', 'Content_Rating', 
        'Android_Ver_Clean', 'App', 'Category'
    ], inplace=True)
    
    df = df.drop_duplicates(subset=['App', 'Category'])
    return df

def load_and_prepare_review_data(file_path_reviews, df_apps):
    if not os.path.exists(file_path_reviews):
        print(f"Error: Reviews file not found at {file_path_reviews}")
        return None
    try:
        df_reviews = pd.read_csv(file_path_reviews)
    except Exception as e:
        print(f"Error loading review data: {e}")
        return None
    df_reviews.dropna(subset=['Sentiment_Subjectivity'], inplace=True)
    df_reviews_agg = df_reviews.groupby('App')['Sentiment_Subjectivity'].mean().reset_index()
    df_merged = pd.merge(df_apps, df_reviews_agg, on='App', how='inner')
    return df_merged

def check_time(start_hour, end_hour, current_time):
    start_time = datetime.time(start_hour, 0)
    end_time = datetime.time(end_hour, 0)
    return start_time <= current_time < end_time

def save_plot_as_html(chart, filename):
    try:
        chart.save(filename)
        print(f"Chart successfully saved to {filename}")
    except Exception as e:
        print(f"Error saving chart {filename}: {e}")

def save_plotly_chart_as_html(chart, filename):
    try:
        chart.write_html(filename)
        print(f"Chart successfully saved to {filename}")
    except Exception as e:
        print(f"Error saving chart {filename}: {e}")

def create_dashboard(charts_data, filename="dashboard.html"):
    html_content = """
    <!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>App Dashboard</title><style>
    body { font-family: 'Arial', sans-serif; background-color: #f4f4f4; margin: 0; padding: 20px; }
    h1 { text-align: center; color: #333; } .dashboard-grid { display: grid;
    grid-template-columns: repeat(auto-fit, minmax(600px, 1fr)); gap: 20px; padding: 20px; }
    .chart-container { border: 1px solid #ddd; border-radius: 8px; background-color: #ffffff;
    padding: 15px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
    .chart-container h2 { font-size: 1.2em; color: #555; margin-top: 0; }
    iframe { width: 100%; height: 450px; border: none; }
    .unavailable { height: 450px; display: flex; align-items: center; justify-content: center;
    flex-direction: column; color: #999; }
    .unavailable h3 { font-size: 1.5em; margin: 0; }
    </style></head><body><h1>Google Play Store App Dashboard</h1><div class="dashboard-grid">
    """
    for chart in charts_data:
        html_content += f"""<div class="chart-container"><h2>{chart['title']}</h2>"""
        if chart['file_path']:
            html_content += f"""<iframe src="{chart['file_path']}"></iframe>"""
        else:
            html_content += f"""<div class="unavailable"><h3>Chart Unavailable</h3><p>{chart['message']}</p></div>"""
        html_content += """</div>"""
    html_content += """</div></body></html>"""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"\nSuccessfully generated dashboard: {filename}")
        display(HTML(f"<h2><a href='{filename}' target='_blank'>Click to View Dashboard</a></h2>"))
    except Exception as e:
        print(f"Error writing dashboard file: {e}")

def clear_old_charts():
    print("Cleaning up old chart files...")
    chart_files = [
        'task_1_bar_chart.html',
        'task_2_map.html',
        'task_3_dual_axis.html',
        'task_4_time_series.html',
        'task_5_bubble_chart.html',
        'task_6_stacked_area.html'
    ]
    for filename in chart_files:
        if os.path.exists(filename):
            os.remove(filename)
            print(f"Removed old file: {filename}")

def task_grouped_bar(df):
    df_task = df.copy()
    
    df_app_filtered = df_task[
        (df_task['Size_MB'] >= 10) & 
        (df_task['Last Updated'].dt.month == 1)
    ]
    
    if df_app_filtered.empty:
        print("Task 1: No apps found matching Size and Date filters.")
        return None

    df_category_summary = df_app_filtered.groupby('Category', as_index=False).agg(
        Average_Rating=('Rating', 'mean'),
        Total_Installs=('Installs', 'sum'),
        Total_Reviews=('Reviews', 'sum')
    )
    
    if df_category_summary.empty:
        print("Task 1: No categories found after grouping.")
        return None

    df_category_filtered = df_category_summary[
        df_category_summary['Average_Rating'] >= 4.0
    ]
    
    if df_category_filtered.empty:
        print("Task 1: No categories found with Avg Rating >= 4.0.")
        return None

    df_top_10 = df_category_filtered.nlargest(10, 'Total_Installs')
    
    if df_top_10.empty:
        print("Task 1: No data left for Top 10.")
        return None
        
    df_plot_data = df_top_10[['Category', 'Average_Rating', 'Total_Reviews']]
    
    df_melted = df_plot_data.melt('Category', var_name='Metric', value_name='Value')

    chart = alt.Chart(df_melted).mark_bar().encode(
        x=alt.X('Category:N', sort='-y', title='App Category'),
        y=alt.Y('Value:Q', title='Value'),
        color=alt.Color('Metric:N', title='Metric'),
        tooltip=['Category', 'Metric', 'Value']
    ).properties(
        title='Average Rating and Total Reviews by Top 10 App Categories (Filtered)',
        width=700,
        height=400
    ).interactive()

    return chart

def task_choropleth_map(df):
    df2 = df.copy()
    df2 = df2[~df2['Category'].str.startswith(tuple(['A','C','G','S']))]
    
    df_agg = df2.groupby('Category')['Installs'].sum().reset_index()
    df_agg = df_agg[df_agg['Installs'] > 1_000_000]
    
    top5 = df_agg.nlargest(5, 'Installs')

    if top5.empty:
        print("Task 2: No data found after filters.")
        return None
        
    dummy_locations = {
        'iso_alpha': ['USA', 'IND', 'CHN', 'BRA', 'GBR'],
        'location': ['United States', 'India', 'China', 'Brazil', 'United Kingdom']
    }
    
    dummy_len = len(top5)
    top5['iso_alpha'] = dummy_locations['iso_alpha'][:dummy_len]
    top5['location'] = dummy_locations['location'][:dummy_len]

    fig = px.choropleth(
        top5,
        locations="iso_alpha",
        locationmode="ISO-3",
        color="Installs",
        hover_name="Category",
        hover_data={"location": True, "Installs": ':,2f', "iso_alpha": False},
        title="Top 5 Categories by Global Installs (Filtered)"
    )
    fig.update_layout(
        geo=dict(showframe=False, showcoastlines=False)
    )
    
    return fig

def task_dual_axis(df):
    top_3_cats = df.groupby('Category')['Installs'].sum().nlargest(3).index
    
    df_task3_filtered = df[
        (df['Category'].isin(top_3_cats)) &
        (df['Installs'] >= 10000) &
        (df['Android_Ver_Clean'] > 4.0) &
        (df['Size_MB'] > 15) &
        (df['Content_Rating'] == 'Everyone') &
        (df['App'].str.len() <= 30)
    ].copy()
    
    df_task3_filtered = df_task3_filtered[
        (df_task3_filtered['Type'] == 'Free') | 
        (df_task3_filtered['Revenue'] > 10000)
    ]

    if df_task3_filtered.empty:
        print("Task 3: No data found after filters.")
        return None
        
    df_agg = df_task3_filtered.groupby(['Category', 'Type']).agg(
        Average_Installs=('Installs', 'mean'),
        Average_Revenue=('Revenue', 'mean')
    ).reset_index()

    if df_agg.empty:
        print("Task 3: No data to plot after aggregation.")
        return None
        
    df_free = df_agg[df_agg['Type'] == 'Free'].set_index('Category')
    df_paid = df_agg[df_agg['Type'] == 'Paid'].set_index('Category')
    all_cats = df_agg['Category'].unique()
    
    df_free = df_free.reindex(all_cats)
    df_paid = df_paid.reindex(all_cats)

    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        go.Bar(x=all_cats, y=df_free['Average_Installs'], name='Avg Installs (Free)', marker_color='blue'),
        secondary_y=False
    )
    fig.add_trace(
        go.Bar(x=all_cats, y=df_free['Average_Revenue'], name='Avg Revenue (Free)', marker_color='red'),
        secondary_y=True
    )
    fig.add_trace(
        go.Bar(x=all_cats, y=df_paid['Average_Installs'], name='Avg Installs (Paid)', marker_color='lightblue'),
        secondary_y=False
    )
    fig.add_trace(
        go.Bar(x=all_cats, y=df_paid['Average_Revenue'], name='Avg Revenue (Paid)', marker_color='orange'),
        secondary_y=True
    )
    fig.update_layout(
        title_text="Avg Installs vs. Revenue for Top 3 Categories (Free vs. Paid)",
        barmode='group',
        xaxis_title="App Category"
    )
    fig.update_yaxes(title_text="Average Installs", secondary_y=False, tickformat=',.0f')
    fig.update_yaxes(title_text="Average Revenue ($)", secondary_y=True, tickformat='$,.0f')

    return fig

def task_time_series(df):
    df4 = df.copy()
    df4 = df4[(df4['Reviews'] > 500) &
              (df4['Category'].str.startswith(tuple(['E','C','B']))) &
              (~df4['App'].str.lower().str.startswith(tuple(['x','y','z']))) &
              (~df4['App'].str.contains('S', case=False))]
    
    if df4.empty:
        print("Task 4: No data found after filters.")
        return None

    translation = {'BEAUTY': 'सौंदर्य', 'BUSINESS': 'வணிகம்', 'DATING': 'Partnersuche'}
    df4['Category'] = df4['Category'].replace(translation)
    
    df4_grouped = df4.groupby([pd.Grouper(key='Last Updated', freq='M'), 'Category'])['Installs'].sum().reset_index()
    
    if df4_grouped.empty:
        print("Task 4: No data to plot after grouping.")
        return None

    df4_grouped['Growth'] = df4_grouped.groupby('Category')['Installs'].pct_change()
    df4_grouped['Highlight'] = df4_grouped['Growth'] > 0.2
    
    base = alt.Chart(df4_grouped).encode(
        x='yearmonth(Last Updated):T',
        tooltip=['Last Updated', 'Category', 'Installs', 'Growth']
    )
    
    lines = base.mark_line().encode(
        y='Installs:Q',
        color='Category:N'
    )
    
    shaded_areas = base.mark_area(opacity=0.3).encode(
        y='Installs:Q',
        color='Category:N',
        y2=alt.value(0) 
    ).transform_filter(
        'datum.Highlight'
    )
    
    chart4 = (lines + shaded_areas).properties(
        title='Total Installs Trend by Category (Filtered)'
    ).interactive()
    
    return chart4

def task_bubble_chart(df_merged):
    df_task5_filtered = df_merged[
        (df_merged['Rating'] > 3.5) &
        (df_merged['Category'].isin([
            'GAME', 'BEAUTY', 'BUSINESS', 'COMICS', 'COMMUNICATION', 
            'DATING', 'ENTERTAINMENT', 'SOCIAL', 'EVENTS'
        ])) &
        (df_merged['Reviews'] > 500) &
        (~df_merged['App'].str.lower().str.contains('s')) &
        (df_merged['Installs'] > 50000) &
        (df_merged['Sentiment_Subjectivity'] > 0.5)
    ].copy()

    if df_task5_filtered.empty:
        print("Task 5: No data found after filters.")
        return None
        
    translation_map = {
        'BEAUTY': 'सौंदर्य',
        'BUSINESS': 'வணிகம்',
        'DATING': 'Partnersuche'
    }
    df_task5_filtered['Category'] = df_task5_filtered['Category'].replace(translation_map)
    
    domain_categories = df_task5_filtered['Category'].unique().tolist()
    range_colors = ['blue'] * len(domain_categories)
    
    try:
        game_index = domain_categories.index('GAME')
        range_colors[game_index] = 'pink'
    except ValueError:
        pass
    
    bubble_chart = alt.Chart(df_task5_filtered).mark_circle().encode(
        x=alt.X('Size_MB', title='Size (MB)', scale=alt.Scale(zero=False)),
        y=alt.Y('Rating', title='Average Rating', scale=alt.Scale(zero=False)),
        size=alt.Size('Installs', title='Number of Installs', legend=None),
        color=alt.Color('Category', title='Category',
                        scale=alt.Scale(domain=domain_categories, range=range_colors)
                       ),
        tooltip=['App', 'Category', 'Rating', 'Size_MB', 'Installs', 'Sentiment_Subjectivity']
    ).properties(
        title='App Analysis: Size vs. Rating (Bubble size: Installs)',
    ).interactive()

    return bubble_chart

def task_stacked_area(df):
    df6 = df.copy()
    df6 = df6[(df6['Rating'] >= 4.2) &
              (df6['Reviews'] > 1000) &
              (df6['Size_MB'].between(20, 80)) &
              (df6['Category'].str.startswith(tuple(['T','P']))) &
              (~df6['App'].str.contains(r'\d'))]
    
    if df6.empty:
        print("Task 6: No data found after filters.")
        return None
        
    translation = {'Travel & Local': 'Voyages et local', 'Productivity': 'Productividad', 'Photography': '写真'}
    df6['Category'] = df6['Category'].replace(translation)
    
    df6_grouped = df6.groupby([pd.Grouper(key='Last Updated', freq='M'), 'Category'])['Installs'].sum().reset_index()
    
    if df6_grouped.empty:
        print("Task 6: No data to plot after grouping.")
        return None
        
    df6_grouped['Cumulative_Installs'] = df6_grouped.groupby('Category')['Installs'].cumsum()

    df6_grouped['Growth'] = df6_grouped.groupby('Category')['Installs'].pct_change()
    df6_grouped['Highlight'] = df6_grouped['Growth'] > 0.25
    
    base = alt.Chart(df6_grouped).encode(
        x='yearmonth(Last Updated):T',
        tooltip=['Last Updated', 'Category', 'Installs', 'Cumulative_Installs', 'Growth']
    )
    
    areas = base.mark_area().encode(
        y='Cumulative_Installs:Q',
        color=alt.Color('Category:N'),
        opacity=alt.condition('datum.Highlight', alt.value(0.8), alt.value(0.5))
    )
    
    chart6 = areas.properties(
        title='Cumulative Installs Over Time by Category (Filtered)'
    ).interactive()
    
    return chart6


file_path = r'C:\Users\91866\Desktop\Projects-3\googleplaystore.csv'
file_path_reviews = r'C:\Users\91866\Desktop\Projects-3\googleplaystore_user_reviews.csv'
IST = pytz.timezone('Asia/Kolkata')
now = datetime.datetime.now(IST)
current_time = now.time()
print(f"--- Dashboard Generation Started ---")
print(f"Current IST time: {current_time.strftime('%H:%M:%S')}")

clear_old_charts()

df_master = None
if not os.path.exists(file_path):
    print(f"Error: Dataset file not found at {file_path}")
else:
    try:
        df_master = load_and_clean_data(file_path)
        print("Data loaded and cleaned successfully.")
    except Exception as e:
        print(f"Error loading or cleaning data: {e}")

charts_data = []


print("\nProcessing Task 1 (Bar Chart)...")
if df_master is not None and check_time(15, 17, current_time):
    print("Time is within 3 PM - 5 PM window. Running task...")
    chart1 = task_grouped_bar(df_master)
    
    if chart1:
        save_plot_as_html(chart1, 'task_1_bar_chart.html')
        charts_data.append({
            'title': 'Task 1: Top 10 Categories (Jan, >4.0 Rating, >10M)',
            'file_path': 'task_1_bar_chart.html',
            'message': ''
        })
    else:
        print("Task 1 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 1: Grouped Bar Chart',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 1 skipped: Main data failed to load.")
    else:
        print("Task 1 is unavailable outside 3 PM – 5 PM IST.")
        charts_data.append({
            'title': 'Task 1: Grouped Bar Chart',
            'file_path': None, 
            'message': 'This chart is only available between 3 PM and 5 PM IST.'
        })


print("\nProcessing Task 2 (Choropleth Map)...")
if df_master is not None and check_time(18, 20, current_time):
    print("Time is within 6 PM - 8 PM window. Running task...")
    chart2 = task_choropleth_map(df_master)
    
    if chart2:
        save_plotly_chart_as_html(chart2, 'task_2_map.html')
        charts_data.append({
            'title': 'Task 2: Top 5 Categories (Filtered)',
            'file_path': 'task_2_map.html',
            'message': ''
        })
    else:
        print("Task 2 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 2: Choropleth Map',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 2 skipped: Main data failed to load.")
    else:
        print("Task 2 is unavailable outside 6 PM – 8 PM IST.")
        charts_data.append({
            'title': 'Task 2: Choropleth Map',
            'file_path': None, 
            'message': 'This chart is only available between 6 PM and 8 PM IST.'
        })


print("\nProcessing Task 3 (Dual-Axis Chart)...")
if df_master is not None and check_time(13, 14, current_time):
    print("Time is within 1 PM - 2 PM window. Running task...")
    chart3 = task_dual_axis(df_master)
    
    if chart3:
        save_plotly_chart_as_html(chart3, 'task_3_dual_axis.html')
        charts_data.append({
            'title': 'Task 3: Avg Installs vs. Revenue (Top 3 Categories)',
            'file_path': 'task_3_dual_axis.html',
            'message': ''
        })
    else:
        print("Task 3 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 3: Dual-Axis Chart',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 3 skipped: Main data failed to load.")
    else:
        print("Task 3 is unavailable outside 1 PM – 2 PM IST.")
        charts_data.append({
            'title': 'Task 3: Dual-Axis Chart',
            'file_path': None, 
            'message': 'This chart is only available between 1 PM and 2 PM IST.'
        })


print("\nProcessing Task 4 (Time Series Chart)...")
if df_master is not None and check_time(18, 21, current_time):
    print("Time is within 6 PM - 9 PM window. Running task...")
    chart4 = task_time_series(df_master)
    
    if chart4:
        save_plot_as_html(chart4, 'task_4_time_series.html')
        charts_data.append({
            'title': 'Task 4: Installs Trend by Category (Filtered)',
            'file_path': 'task_4_time_series.html',
            'message': ''
        })
    else:
        print("Task 4 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 4: Time Series Chart',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 4 skipped: Main data failed to load.")
    else:
        print("Task 4 is unavailable outside 6 PM – 9 PM IST.")
        charts_data.append({
            'title': 'Task 4: Time Series Chart',
            'file_path': None, 
            'message': 'This chart is only available between 6 PM and 9 PM IST.'
        })


print("\nProcessing Task 5 (Bubble Chart)...")
if df_master is not None and check_time(17, 19, current_time):
    print("Time is within 5 PM - 7 PM window. Running task...")
    
    df_merged_reviews = load_and_prepare_review_data(file_path_reviews, df_master)
    
    if df_merged_reviews is None:
        print("Task 5 skipped: Could not load or merge review data.")
        charts_data.append({
            'title': 'Task 5: Bubble Chart',
            'file_path': None, 
            'message': 'Review data file not found or failed to merge.'
        })
    else:
        chart5 = task_bubble_chart(df_merged_reviews)
        if chart5:
            save_plot_as_html(chart5, 'task_5_bubble_chart.html')
            charts_data.append({
                'title': 'Task 5: App Analysis (Bubble Chart)',
                'file_path': 'task_5_bubble_chart.html',
                'message': ''
            })
        else:
            print("Task 5 ran, but no data matched the filters.")
            charts_data.append({
                'title': 'Task 5: Bubble Chart',
                'file_path': None, 
                'message': 'No data available after applying filters.'
            })
else:
    if df_master is None:
        print("Task 5 skipped: Main data failed to load.")
    else:
        print("Task 5 is unavailable outside 5 PM – 7 PM IST.")
        charts_data.append({
            'title': 'Task 5: Bubble Chart',
            'file_path': None, 
            'message': 'This chart is only available between 5 PM and 7 PM IST.'
        })


print("\nProcessing Task 6 (Stacked Area Chart)...")
if df_master is not None and check_time(16, 18, current_time):
    print("Time is within 4 PM - 6 PM window. Running task...")
    chart6 = task_stacked_area(df_master)
    
    if chart6:
        save_plot_as_html(chart6, 'task_6_stacked_area.html')
        charts_data.append({
            'title': 'Task 6: Cumulative Installs Over Time',
            'file_path': 'task_6_stacked_area.html',
            'message': ''
        })
    else:
        print("Task 6 ran, but no data matched the filters.")
        charts_data.append({
            'title': 'Task 6: Stacked Area Chart',
            'file_path': None, 
            'message': 'No data available after applying filters.'
        })
else:
    if df_master is None:
        print("Task 6 skipped: Main data failed to load.")
    else:
        print("Task 6 is unavailable outside 4 PM – 6 PM IST.")
        charts_data.append({
            'title': 'Task 6: Stacked Area Chart',
            'file_path': None, 
            'message': 'This chart is only available between 4 PM and 6 PM IST.'
        })


print("\n--- All tasks processed. Generating dashboard.html ---")
if not charts_data:
    print("No charts were generated to add to the dashboard.")
else:
    create_dashboard(charts_data, "dashboard.html")

In [9]:
import os
import plotly.io as pio

html_files_path = "./html_charts"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

plot_containers = ""

# Save each Plotly figure as an HTML snippet for dashboard
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)

    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')

    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """

    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')
    print(f"✅ Saved: {filename}\nInsight: {insight}")


In [10]:
plot_width = 400
plot_height = 300
plot_bg_color = 'black'
text_color = 'white'
title_font = {'size': 16}
axis_font = {'size': 12}

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os, datetime, pytz, webbrowser, re

base_path = r"C:\Users\91866\Desktop\Projects-3"
html_files_path = os.path.join(base_path, "html_charts")
os.makedirs(html_files_path, exist_ok=True)

plot_containers = ""
plot_width = 450
plot_height = 320
plot_bg_color = 'white'
text_color = 'black'
title_font = {'size':16}
axis_font = {'size':12}

def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>"""
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(pio.to_html(fig, full_html=True, include_plotlyjs='cdn'))
    print(f"Saved: {filepath}")

def no_data_box(tasknum,msg):
    global plot_containers
    plot_containers += f"""
    <div class="plot-container">
        <h3 style="text-align:center;color:#555;">Task {tasknum}</h3>
        <div style="display:flex;align-items:center;justify-content:center;height:100%;">
            <p style="font-size:14px;color:#777;">{msg}</p>
        </div>
    </div>"""

IST = pytz.timezone('Asia/Kolkata')
now = datetime.datetime.now(IST).time()

apps_df = pd.read_csv(os.path.join(base_path, "googleplaystore.csv"))
reviews_df = pd.read_csv(os.path.join(base_path, "googleplaystore_user_reviews.csv"))

apps_df['Installs'] = apps_df['Installs'].astype(str).str.replace(r'[+,]','',regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')
apps_df['Size_MB'] = apps_df['Size'].astype(str).str.replace('M','').str.replace('k','e-3')
apps_df['Size_MB'] = pd.to_numeric(apps_df['Size_MB'], errors='coerce')
apps_df['Price'] = apps_df['Price'].astype(str).str.replace('$','')
apps_df['Price'] = pd.to_numeric(apps_df['Price'], errors='coerce')
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')
apps_df['Android_Ver_Clean'] = apps_df['Android Ver'].astype(str).str.split(' ').str[0].str.slice(0,3)
apps_df['Android_Ver_Clean'] = pd.to_numeric(apps_df['Android_Ver_Clean'], errors='coerce')
apps_df.rename(columns={'Content Rating':'Content_Rating'}, inplace=True)
apps_df['Revenue'] = apps_df['Price'] * apps_df['Installs']

if 'Sentiment_Subjectivity' in reviews_df.columns:
    sub = reviews_df.groupby('App')['Sentiment_Subjectivity'].mean().reset_index()
    apps_df = apps_df.merge(sub,on='App',how='left')

def task_placeholder(task, start, end):
    if not(start <= now < end):
        no_data_box(task, f"Visible only between {start.strftime('%I:%M %p')} – {end.strftime('%I:%M %p')} IST")
        return False
    return True

# TASK 1
if task_placeholder(1, datetime.time(15,0), datetime.time(17,0)):
    df1 = apps_df[(apps_df['Size_MB']>=10)&(apps_df['Rating']>=4.0)&(apps_df['Last Updated'].dt.month==1)]
    grp = df1.groupby('Category').agg({'Installs':'sum','Rating':'mean','Reviews':'sum'}).reset_index()
    top10 = grp.sort_values('Installs',ascending=False).head(10)
    if not top10.empty:
        fig1 = go.Figure()
        fig1.add_trace(go.Bar(x=top10['Category'],y=top10['Rating'],name='Average Rating'))
        fig1.add_trace(go.Bar(x=top10['Category'],y=top10['Reviews'],name='Total Reviews'))
        fig1.update_layout(barmode='group',plot_bgcolor=plot_bg_color,paper_bgcolor=plot_bg_color,font_color=text_color)
        save_plot_as_html(fig1,"task1.html","Average rating and total reviews for top 10 categories by installs")

# TASK 2
if task_placeholder(2, datetime.time(18,0), datetime.time(20,0)):
    d2 = apps_df[~apps_df['Category'].str.startswith(tuple(['A','C','G','S']))]
    agg = d2.groupby('Category')['Installs'].sum().reset_index()
    top5 = agg.sort_values('Installs',ascending=False).head(5)['Category'].tolist()
    d2 = d2[d2['Category'].isin(top5)]
    if not d2.empty:
        countries = ["United States","India","UK","Germany","France","Canada","Australia","Japan","Brazil","South Korea"]
        d2['Country'] = [countries[i % len(countries)] for i in range(len(d2))]
        agg_country = d2.groupby(['Country','Category'])['Installs'].sum().reset_index()
        fig2 = px.choropleth(agg_country, locations='Country', locationmode='country names',
                             color='Installs', hover_name='Category', projection='natural earth',
                             color_continuous_scale='Viridis')
        fig2.update_layout(paper_bgcolor='white', plot_bgcolor='white')
        save_plot_as_html(fig2,"task2.html","Global installs by top 5 categories")

# TASK 3
if task_placeholder(3, datetime.time(13,0), datetime.time(14,0)):
    d3 = apps_df[(apps_df['Installs']>=10000)&(apps_df['Revenue']>=10000)&(apps_df['Android_Ver_Clean']>4.0)&(apps_df['Size_MB']>15)&(apps_df['Content_Rating'].astype(str).str.lower()=='everyone')&(apps_df['App'].astype(str).str.len()<=30)]
    top3 = d3.groupby('Category')['Installs'].sum().reset_index().sort_values('Installs',ascending=False).head(3)['Category']
    d3 = d3[d3['Category'].isin(top3)]
    if not d3.empty:
        agg = d3.groupby(['Category','Type']).agg({'Installs':'mean','Revenue':'mean'}).reset_index()
        fig3 = make_subplots(specs=[[{"secondary_y": True}]])
        for t in agg['Type'].unique():
            sub = agg[agg['Type']==t]
            fig3.add_trace(go.Bar(x=sub['Category'],y=sub['Installs'],name=f'Avg Installs {t}'),secondary_y=False)
            fig3.add_trace(go.Scatter(x=sub['Category'],y=sub['Revenue'],name=f'Avg Revenue {t}'),secondary_y=True)
        save_plot_as_html(fig3,"task3.html","Average installs and revenue for free vs paid apps")

# TASK 4
if task_placeholder(4, datetime.time(18,0), datetime.time(21,0)):
    d4 = apps_df[~apps_df['App'].astype(str).str.lower().str.startswith(tuple(['x','y','z']))]
    d4 = d4[d4['Reviews']>500]
    d4 = d4[~d4['App'].astype(str).str.contains('S',case=False,na=False)]
    d4 = d4[d4['Category'].astype(str).str.startswith(tuple(['E','C','B']))]
    d4['Month']=d4['Last Updated'].dt.to_period('M').dt.to_timestamp()
    grp=d4.groupby(['Month','Category'])['Installs'].sum().reset_index()
    if not grp.empty:
        fig4 = px.line(grp,x='Month',y='Installs',color='Category')
        save_plot_as_html(fig4,"task4.html","Install trends by category over time")

# TASK 5
if task_placeholder(5, datetime.time(17,0), datetime.time(19,0)):
    valid=['Game','Beauty','Business','Comics','Communication','Dating','Entertainment','Social','Event']
    d5 = apps_df[(apps_df['Rating']>3.5)&(apps_df['Reviews']>500)&(apps_df['Installs']>50000)&(apps_df['Category'].isin(valid))&(~apps_df['App'].astype(str).str.contains('S',case=False,na=False))]
    if 'Sentiment_Subjectivity' in d5.columns:
        d5 = d5[d5['Sentiment_Subjectivity']>0.5]
    if not d5.empty:
        fig5 = px.scatter(d5,x='Size_MB',y='Rating',size='Installs',color='Category',hover_name='App')
        fig5.update_layout(paper_bgcolor='white', plot_bgcolor='white')
        save_plot_as_html(fig5,"task5.html","Bubble chart of size vs rating with installs as bubble size")

# TASK 6
if task_placeholder(6, datetime.time(16,0), datetime.time(18,0)):
    d6 = apps_df[(apps_df['Rating']>=4.2)&(~apps_df['App'].astype(str).str.contains(r'\d',na=False))&(apps_df['Category'].astype(str).str.startswith(tuple(['T','P'])))&(apps_df['Reviews']>1000)&(apps_df['Size_MB'].between(20,80))]
    d6['Month']=d6['Last Updated'].dt.to_period('M').dt.to_timestamp()
    grp=d6.groupby(['Month','Category'])['Installs'].sum().reset_index()
    if not grp.empty:
        fig6 = px.area(grp,x='Month',y='Installs',color='Category')
        fig6.update_layout(paper_bgcolor='white', plot_bgcolor='white')
        save_plot_as_html(fig6,"task6.html","Cumulative installs over time for each category")

dashboard_html = """<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Google Play Store Review Analytics</title>
<style>
body{{font-family:Arial,sans-serif;background-color:#f4f4f4;color:#111;margin:0;padding:0;}}
.container{{display:grid;grid-template-columns:repeat(auto-fit,minmax(460px,1fr));gap:20px;padding:20px;justify-items:center;}}
.plot-container{{border:2px solid #bbb;margin:5px;padding:5px;width:{plot_width}px;height:{plot_height}px;overflow:hidden;position:relative;background:#fff;cursor:pointer;box-shadow:0 0 10px rgba(0,0,0,0.1);}}
.insights{{display:none;position:absolute;right:10px;top:10px;background-color:rgba(0,0,0,0.7);padding:5px;border-radius:5px;color:#fff;}}
.plot-container:hover .insights{{display:block;}}
h1{{text-align:center;padding:20px 0;background:#222;color:#fff;margin:0;}}
</style><script>
function openPlot(filename){{window.open(filename,'_blank');}}
</script></head><body>
<h1>Google Play Store Reviews Dashboard</h1>
<div class="container">{plots}</div></body></html>"""

final_html = dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)
dashboard_path = os.path.join(html_files_path,"dashboard.html")
with open(dashboard_path,"w",encoding="utf-8") as f: f.write(final_html)
webbrowser.open('file://'+os.path.realpath(dashboard_path))
