In [None]:
# Imports
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os
nltk.download('vader_lexicon')

In [None]:
# Step 1: Load the Dataset
apps_df = pd.read_csv('Play Store Data.csv')
reviews_df = pd.read_csv('User Reviews.csv')

In [None]:
# Step 2: Data Cleaning
apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns:
    apps_df[column].fillna(apps_df[column].mode()[0], inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df = apps_df[apps_df['Rating'] <= 5]
reviews_df.dropna(subset=['Translated_Review'], inplace=True)
# Merge datasets
merged_df = pd.merge(apps_df, reviews_df, on='App', how='inner')

In [None]:
# Step 3: Data Transformation
apps_df['Reviews'] = apps_df['Reviews'].astype(int)
apps_df['Installs'] = apps_df['Installs'].str.replace(',', '').str.replace('+', '').astype(int)
apps_df['Price'] = apps_df['Price'].str.replace('$', '').astype(float)
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size:
        return float(size.replace('k', '')) / 1024
    else:
        return np.nan
apps_df['Size'] = apps_df['Size'].apply(convert_size)
apps_df['Log_Installs'] = np.log1p(apps_df['Installs'])
apps_df['Log_Reviews'] = np.log1p(apps_df['Reviews'])
def rating_group(rating):
    if rating >= 4:
        return 'Top rated'
    elif rating >= 3:
        return 'Above average'
    elif rating >= 2:
        return 'Average'
    else:
        return 'Below average'
apps_df['Rating_Group'] = apps_df['Rating'].apply(rating_group)
apps_df['Revenue'] = apps_df['Price'] * apps_df['Installs']
# Sentiment Analysis
sia = SentimentIntensityAnalyzer()
reviews_df['Sentiment_Score'] = reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')
apps_df['Year'] = apps_df['Last Updated'].dt.year

In [None]:
# Step 4: Initialize HTML Dashboard
html_files_path = './'
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)
plot_containers = ''
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    plot_containers += f"""
    <div class='plot-container' id='{filename}' onclick="openPlot('{filename}')">
        <div class='plot'>{html_content}</div>
        <div class='insights'>{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

In [None]:
# Step 5: Create Plots
# Example: Category Analysis Plot
category_counts = apps_df['Category'].value_counts().nlargest(10)
fig1 = px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x': 'Category', 'y': 'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma
)
save_plot_as_html(fig1, 'category_analysis.html', 'Top categories are dominated by tools, entertainment, and productivity apps.')

In [None]:
# Step 6: Add more plots (type, ratings, installs, etc.) in separate cells
# ... repeat save_plot_as_html for fig2, fig3 ... fig10

In [None]:
# Step 7: Create the final HTML dashboard
dashboard_html = """
<!DOCTYPE html>
<html lang='en'>
<head>
    <meta charset='UTF-8'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0'>
    <title>Google Play Store Reviews Analytics</title>
    <style>
        body {{ font-family: Arial; background-color: #333; color: #fff; margin:0; padding:0; }}
        .container {{ display:flex; flex-wrap:wrap; justify-content:center; padding:20px; }}
        .plot-container {{ border:2px solid #555; margin:10px; padding:10px; width:400px; height:300px; overflow:hidden; position:relative; cursor:pointer; }}
        .insights {{ display:none; position:absolute; right:10px; top:10px; background-color: rgba(0,0,0,0.7); padding:5px; border-radius:5px; color:#fff; }}
        .plot-container:hover .insights {{ display:block; }}
    </style>
    <script>
        function openPlot(filename) {{ window.open(filename,'_blank'); }}
    </script>
</head>
<body>
    <div class='container'>
        {plots}
    </div>
</body>
</html>
"""
final_html = dashboard_html.format(plots=plot_containers)
dashboard_path = os.path.join(html_files_path, 'dashboard.html')
with open(dashboard_path, 'w', encoding='utf-8') as f:
    f.write(final_html)
webbrowser.open('file://' + os.path.realpath(dashboard_path))