In [1]:
# Combined Dashboard and HTML Export for Google Play Store Analytics

import os
import webbrowser
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pytz

# Define file paths
APPS_CSV = "googleplaystore.csv"
REVIEWS_CSV = "googleplaystore_user_reviews.csv"
EXTENDED_CSV = "googleplaystore14.csv"

# Load datasets
apps_df = pd.read_csv(APPS_CSV)
reviews_df = pd.read_csv(REVIEWS_CSV)
extended_df = pd.read_csv(EXTENDED_CSV)

# Preprocess common columns in apps_df
apps_df['Installs'] = pd.to_numeric(
    apps_df['Installs'].astype(str).str.replace(r'[+,]', '', regex=True), errors='coerce'
)
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')
apps_df['Price'] = pd.to_numeric(
    apps_df['Price'].astype(str).str.replace(r'[$]', '', regex=True), errors='coerce'
)
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')
extended_df['Installs'] = pd.to_numeric(
    extended_df['Installs'].astype(str).str.replace(r'[^\d]', '', regex=True), errors='coerce'
).fillna(0).astype(int)

# Current IST time
tz = pytz.timezone('Asia/Kolkata')
now_ist = datetime.now(tz)
current_time = now_ist.time()

# Time check utility
def is_time_allowed(start, end):
    fmt = "%H:%M"
    return datetime.strptime(start, fmt).time() <= current_time <= datetime.strptime(end, fmt).time()

# Remove old outputs
dirs = [
    'app_rating_distribution.html', 'revenue_vs_installs.html', 'global_installs.html',
    'rating_reviews_comparison.html', 'free_paid_comparison.html',
    'monthly_installs_trend.html', 'monthly_installs_growth.html',
    'correlation_matrix.html', 'bubble_size_rating.html', 'dashboard.html'
]
for f in dirs:
    if os.path.exists(f):
        os.remove(f)

# Task 1
merged1 = pd.merge(apps_df, reviews_df, on='App')
fd1 = merged1.groupby('App').filter(lambda x: len(x)>1000)
top5 = fd1['Category'].value_counts().nlargest(5).index
fd1 = fd1[fd1['Category'].isin(top5)]
def group_rating(r): return '1-2 Stars' if r<=2 else '3-4 Stars' if r<=4 else '4-5 Stars'
fd1['Rating_Group'] = fd1['Rating'].apply(group_rating)
sc1 = fd1.groupby(['Category','Rating_Group','Sentiment']).size().reset_index(name='Count')
fig1 = px.bar(sc1, x='Rating_Group', y='Count', color='Sentiment', facet_col='Category', barmode='stack', title='Sentiment Distribution')
pio.write_html(fig1, 'app_rating_distribution.html', auto_open=True)

# Task 2
if is_time_allowed('14:00','16:00'):
    df2 = apps_df[(apps_df.Type=='Paid')&(apps_df.Price>0)&(apps_df.Installs>0)].copy()
    df2['Revenue']=df2.Installs*df2.Price
    fig2 = px.scatter(df2, x='Installs', y='Revenue', color='Category', trendline='ols', hover_name='App', title='Revenue vs Installs')
    pio.write_html(fig2, 'revenue_vs_installs.html', auto_open=False)

# Task 3
if is_time_allowed('18:00','20:00'):
    df3 = extended_df[['Country','Category','Installs']].dropna()
    df3 = df3[~df3.Category.str.startswith(('A','C','G','S'))]
    top5e = df3.groupby('Category')['Installs'].sum().nlargest(5).index
    df3 = df3[df3.Category.isin(top5e)]
    g3 = df3.groupby(['Country','Category'])['Installs'].sum().reset_index()
    fig3 = px.choropleth(g3, locations='Country', locationmode='country names', color='Category', hover_data=['Installs'], title='Global Installs')
    pio.write_html(fig3, 'global_installs.html', auto_open=False)

# Task 4
if is_time_allowed('15:00','17:00'):
    df4 = apps_df[(apps_df.Rating<4)&(apps_df.Reviews>=10)]
    val = df4.Category.value_counts()[lambda x: x>50].index
    df4 = df4[df4.Category.isin(val)]
    st4 = df4.groupby('Category').agg({'Rating':'mean','Reviews':'sum'}).reset_index()
    m4 = st4.melt(id_vars='Category', var_name='Metric', value_name='Value')
    fig4 = px.bar(m4, x='Category', y='Value', color='Metric', barmode='group', title='Rating vs Reviews')
    pio.write_html(fig4, 'rating_reviews_comparison.html', auto_open=False)

# Task 5
if is_time_allowed('13:00','14:00'):
    df5 = apps_df.copy()
    df5['Revenue']=df5.apply(lambda r:r.Installs*r.Price if r.Type=='Paid' else 0,axis=1)
    sm = df5.groupby('Type').agg({'Installs':'mean','Revenue':'mean'}).reset_index()
    f5 = go.Figure(); f5.add_trace(go.Bar(x=sm.Type, y=sm.Installs, name='Avg Installs'))
    f5.add_trace(go.Scatter(x=sm.Type, y=sm.Revenue, mode='lines+markers', name='Avg Revenue', yaxis='y2'))
    f5.update_layout(title='Free vs Paid', yaxis2=dict(overlaying='y', side='right'))
    pio.write_html(f5, 'free_paid_comparison.html', auto_open=False)

# Task 6: Installs Trend
if is_time_allowed('18:00','21:00'):
    df6 = apps_df.dropna(subset=['Last Updated'])
    df6 = df6[~df6.App.str[0].str.upper().isin(['X','Y','Z']) & df6.Category.str[0].isin(['E','C','B']) & (df6.Reviews>500)]
    df6['Month']=df6['Last Updated'].dt.to_period('M').dt.to_timestamp()
    mi = df6.groupby(['Month','Category'])['Installs'].sum().reset_index().sort_values(['Category','Month'])
    mi['Prev']=mi.groupby('Category')['Installs'].shift(1)
    mi['Growth']=(mi.Installs-mi.Prev)/mi.Prev*100
    f6 = go.Figure()
    for c in mi.Category.unique():
        part=mi[mi.Category==c]
        f6.add_trace(go.Scatter(x=part.Month,y=part.Installs,mode='lines+markers',name=f'{c}'))
    pio.write_html(f6,'monthly_installs_trend.html',auto_open=False)

# Task 7: Growth Highlights
if is_time_allowed('18:00','21:00'):
    # reuse mi from Task 6
    f7 = go.Figure()
    for c in mi.Category.unique():
        part=mi[mi.Category==c]
        high=part[part.Growth>20]
        f7.add_trace(go.Scatter(x=part.Month,y=part.Installs,mode='lines',name=f'{c}'))
        f7.add_trace(go.Scatter(x=high.Month,y=high.Installs,fill='tozeroy',mode='none',name=f'{c} >20% Growth'))
    f7.update_layout(title='Growth Highlights')
    pio.write_html(f7,'monthly_installs_growth.html',auto_open=False)

# Task 8
if is_time_allowed('14:00','16:00'):
    one_year=now_ist-timedelta(days=365)
    d8=apps_df[(apps_df['Last Updated']>=one_year)&(apps_df.Installs>=100000)&(apps_df.Reviews>1000)&~apps_df.Genres.astype(str).str[0].isin(list('AFEGIK'))]
    corr=d8[['Installs','Rating','Reviews']].dropna().corr()
    fig8=px.imshow(corr,text_auto=True,title='Correlation Matrix')
    pio.write_html(fig8,'correlation_matrix.html',auto_open=False)

# Task 9
if is_time_allowed('17:00','19:00'):
    cats={'GAME','BEAUTY','BUSINESS','COMICS','COMMUNICATION','DATING','ENTERTAINMENT','SOCIAL','EVENTS'}
    d9=extended_df[extended_df.Category.isin(cats)&(extended_df.Rating>3.5)].dropna(subset=['Reviews','Size','Installs','Sentiment_Subjectivity'])
    d9['Size']=d9.Size.apply(lambda s:float(s.replace('M','')) if 'M' in str(s) else float(str(s).replace('k',''))/1024)
    d9['Sentiment_Subjectivity']=d9.Sentiment_Subjectivity.astype(float)
    d9['Reviews']=d9.Reviews.astype(int)
    d9=d9[(d9.Reviews>500)&(d9.Sentiment_Subjectivity>0.5)&(d9.Installs>50000)]
    fig9=px.scatter(d9,x='Size',y='Rating',size='Installs',color='Category',hover_name='App',title='Bubble Chart')
    pio.write_html(fig9,'bubble_size_rating.html',auto_open=True)



# Generate dashboard with buttons
html_content = """
<!DOCTYPE html>
<html>
<head>
    <title>Google Play Store Data Analytics</title>
    <style>
        body {
            background: url('https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1mjW7J.img?w=728&h=385&m=4&q=98') no-repeat center center fixed;
            background-size: cover;
            color: white;
            text-align: center;
            font-family: Arial, sans-serif;
        }
        h1 {
            margin-top: 20px;
            font-size: 36px;
        }
        button {
            margin: 10px;
            padding: 12px 24px;
            font-size: 16px;
            color: white;
            border: none;
            border-radius: 8px;
            cursor: pointer;
            transition: background-color 0.3s ease;
        }
        button:hover {
            background-color: #0c56d0;
        }
        .sentiment-btn {
            background-color: red;
        }
        .revenue-btn {
            background-color: orange;
        }
        .global-btn {
            background-color: yellow;
            color: black;
        }
        .rating-btn {
            background-color: green;
        }
        .free-paid-btn {
            background-color: cyan;
            color: black;
        }
        .monthly-btn {
            background-color: purple;
        }
        .growth-btn {
            background-color: pink;
            color: black;
        }
        .correlation-btn {
            background-color: blue;
        }
        .bubble-btn {
            background-color: brown;
        }
    </style>
    <script>
        function openPlot(filename, startHour, endHour) {
            const now = new Date();
            const hours = now.getHours();

            if (hours >= startHour && hours < endHour) {
                window.open(filename, '_blank');
            } else {
                alert(`This feature is only available between ${startHour}:00 and ${endHour}:00.`);
            }
        }
    </script>
</head>
<body>
    <h1>
        <img src='https://tse4.mm.bing.net/th?id=OIP.aK0pFHbj6X0jqS0ZGmqGmAHaEK&pid=Api&P=0&h=180' alt='Google Play Store Logo' width='50' style='vertical-align: middle;'>
        Google Play Store Data Analytics
    </h1>
    
    <!-- Buttons with original links -->
    <button class="sentiment-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Sentiment%20Distribution.html', 0, 24)">Sentiment Distribution</button>
    <button class="revenue-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Revenue%20vs%20Installs.html', 0, 17)">Revenue vs Installs</button>
    <button class="global-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Global_Installs_by_Category.html', 0, 20)">Global Installs</button>
    <button class="rating-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Average%20Rating%20and%20Total%20Reviews.html', 0, 17)">Category Ratings & Reviews</button>
    <button class="free-paid-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Average%20Installs%20and%20Revenue.html', 0, 14)">Free vs Paid Apps</button>
    <button class="monthly-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/monthly_installs_trend_task.html', 0, 21)">Total Installs Over Time</button>
    <button class="growth-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/Monthly%20Revenue%20Trend%20for%20Top%205%20Paid%20App%20Categories.html', 0, 18)">Ratings Distribution</button>
    <button class="correlation-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/correlation_matrix%20(1).html', 0, 16)">Correlation Matrix</button>
    <button class="bubble-btn" onclick="openPlot('file:///C:/Users/ADMIN/Downloads/app_size_vs_rating.html', 0, 24)">App Size vs Rating</button>
</body>
</html>
"""

# Save the HTML content to a file
with open("dashboard.html", "w", encoding="utf-8") as f:
    f.write(html_content)

# Open the dashboard in a web browser
webbrowser.open('file://' + os.path.realpath("dashboard.html"))


True