In [2]:
import os
import pandas as pd
import plotly.express as px
import plotly.io as pio
import webbrowser

# =========================
# FOLDER SETUP
# =========================
html_files_path = "./dashboard_html"
os.makedirs(html_files_path, exist_ok=True)

# =========================
# GLOBAL CONTAINER FOR PLOTS
# =========================
plot_containers = ""

# =========================
# SAVE PLOT FUNCTION
# =========================
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')

    plot_containers += f"""
    <div class="plot-container" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

# =========================
# LOAD DATA
# =========================
apps_df = pd.read_csv(r"C:\Users\user\OneDrive\Desktop\Null Class\googleplaystore.csv")
reviews_df = pd.read_csv(r"C:\Users\user\OneDrive\Desktop\Null Class\googleplaystore_user_reviews.csv")

# =========================
# CLEAN DATA
# =========================
apps_df['Installs'] = apps_df['Installs'].astype(str).str.replace(',', '').str.replace('+', '')
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce').fillna(0)

apps_df['Price'] = apps_df['Price'].astype(str).str.replace(r'[\$,]', '', regex=True)
apps_df['Price'] = pd.to_numeric(apps_df['Price'], errors='coerce').fillna(0)
apps_df['Revenue'] = apps_df['Installs'] * apps_df['Price']

apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# =========================
# COMMON STYLES
# =========================
plot_bg = "black"
font_color = "white"

# =========================
# FIGURES
# =========================
# 1. Top Categories
category_counts = apps_df['Category'].value_counts().nlargest(10)
fig1 = px.bar(
    x=category_counts.index,
    y=category_counts.values,
    color=category_counts.index,
    title="Top Categories on Play Store"
)
fig1.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig1, "Category_Graph_1.html", "Top categories: tools, entertainment, productivity apps dominate.")

# 2. App Type
type_counts = apps_df['Type'].value_counts()
fig2 = px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title="App Type Distribution"
)
fig2.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig2, "Type_Graph_2.html", "Most apps are free, monetized via ads or in-app purchases.")

# 3. Rating Distribution
fig3 = px.histogram(apps_df, x="Rating", nbins=20, title="Rating Distribution")
fig3.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig3, "Rating_Graph_3.html", "Ratings skewed high, most apps are rated favorably.")

# 4. Sentiment Distribution
sentiment_counts = reviews_df['Sentiment'].value_counts()
fig4 = px.bar(x=sentiment_counts.index, y=sentiment_counts.values, title="Sentiment Distribution")
fig4.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig4, "Sentiment_Graph_4.html", "Reviews show a mix, slightly leaning positive.")

# 5. Installs by Category
installs_by_category = apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5 = px.bar(
    x=installs_by_category.values,
    y=installs_by_category.index,
    orientation='h',
    title="Installs by Category"
)
fig5.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig5, "Installs_Graph_5.html", "Social and communication apps have most installs.")

# 6. Updates Over Years
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6 = px.line(x=updates_per_year.index, y=updates_per_year.values, title="Number of Updates Over Years")
fig6.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig6, "Updates_Graph_6.html", "Updates increasing, developers actively maintain apps.")

# 7. Revenue by Category
revenue_by_category = apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7 = px.bar(x=revenue_by_category.index, y=revenue_by_category.values, title="Revenue by Category")
fig7.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig7, "Revenue_Graph_7.html", "Business and productivity apps lead in revenue generation.")

# 8. Top Genres
genre_counts = apps_df['Genres'].str.split(';').explode().value_counts().nlargest(10)
fig8 = px.bar(x=genre_counts.index, y=genre_counts.values, title="Top Genres")
fig8.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig8, "Genre_Graph_8.html", "Action and Casual genres dominate.")

# 9. Last Update vs Rating
fig9 = px.scatter(apps_df, x="Last Updated", y="Rating", color="Type", title="Last Update vs Rating")
fig9.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig9, "Update_Graph_9.html", "More frequent updates do not always result in better ratings.")

# 10. Paid vs Free Ratings
fig10 = px.box(apps_df, x="Type", y="Rating", color="Type", title="Rating for Paid vs Free Apps")
fig10.update_layout(plot_bgcolor=plot_bg, paper_bgcolor=plot_bg, font_color=font_color)
save_plot_as_html(fig10, "Paid_Free_Graph_10.html", "Paid apps generally have higher ratings.")

# =========================
# DASHBOARD HTML
# =========================
dashboard_html = f"""
<!DOCTYPE html>
<html>
<head>
<title>Google Play Store Analytics</title>
<style>
body {{
    background:#111;
    color:white;
    font-family:Arial, sans-serif;
    margin:0;
    padding:0;
}}
h1 {{
    text-align:center;
    padding:20px 0;
}}
.container {{
    display:flex;
    flex-wrap:wrap;
    justify-content:center;
    gap:20px;
    padding:10px;
}}
.plot-container {{
    flex: 1 1 45%;
    min-width:350px;
    max-width:800px;
    background:#222;
    border:2px solid #555;
    padding:10px;
    position:relative;
    cursor:pointer;
}}
.insights {{
    display:none;
    position:absolute;
    top:5px;
    right:5px;
    background:rgba(0,0,0,0.7);
    padding:5px;
    font-size:12px;
    width:90%;
}}
.plot-container:hover .insights {{
    display:block;
}}
</style>
<script>
function openPlot(file) {{
    window.open(file,'_blank');
}}
</script>
</head>
<body>
<h1>Google Play Store Analytics Dashboard</h1>
<div class="container">
{plot_containers}
</div>
</body>
</html>
"""

dashboard_path = os.path.join(html_files_path, "web_page.html")
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(dashboard_html)

webbrowser.open("file://" + os.path.realpath(dashboard_path))
print("Dashboard generated! Open web_page.html to view all plots.")

Dashboard generated! Open web_page.html to view all plots.
