In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os

In [None]:
sia=SentimentIntensityAnalyzer()

In [None]:
apps_df= pd.read_csv('PlayStoreData.csv')
reviews_df=pd.read_csv('UserReviews.csv')

In [None]:
apps_df.count()

In [None]:
apps_df.head()

In [None]:
apps_df["Installs"] = apps_df["Installs"].replace({"[,+]": ""}, regex=True)
apps_df["Installs"] = apps_df["Installs"].replace({"Free": np.nan})
apps_df["Installs"] = pd.to_numeric(apps_df["Installs"], errors="coerce")
apps_df["Installs"] = apps_df["Installs"].fillna(0).astype(int)

In [None]:
#Now we have removed the unwantes symbols in the column
apps_df.head()

In [None]:
#Here we will change the approriate datatype for each column
def convert_price(price):
    try:
        if price == 'Free':
            return 0  
        if isinstance(price, str) and price.startswith('$'):
            return int(float(price.replace('$', '')))  # Remove $ and convert to int
        return 0  # Default value for invalid or unexpected entries
    except ValueError:
        return 0

In [None]:
apps_df['Price'] = apps_df['Price'].apply(convert_price)

In [None]:
#The datatype for price column is changed
apps_df.dtypes

In [None]:
def convert_to_mb(size):
    if size.endswith("M"):
        return float(size.replace("M", ""))  
    elif size.endswith("K"):
        return float(size.replace("K", "")) * 0.001  
    else:
        return None

In [None]:
#Here we have chnaged the format for size column
apps_df["size_in_MB"] = apps_df["Size"].apply(convert_to_mb)

In [None]:
apps_df.dtypes

In [None]:
def convert_android_ver(version):
    if isinstance(version, str):
        version = version.strip()  # Remove leading/trailing spaces
        if version == 'Varies with device' or not version:  # Handle 'Varies with device' or empty values
            return None  # Keep it as None for invalid entries
        try:
            # Split by space and keep only the first part (the version number)
            version = version.split(' ')[0]
            return version
        except Exception:
            return None  # In case of any error, return None
    return None  # Return None if not a valid string

In [None]:
apps_df['Android Ver'] = apps_df['Android Ver'].apply(convert_android_ver)

In [None]:
print(apps_df[['App', 'Android Ver']])

In [None]:
print(apps_df['Android Ver'].count())

In [None]:
apps_df.head()

In [None]:
apps_df['Android Ver'] = pd.to_numeric(apps_df['Android Ver'], errors='coerce')

In [None]:
apps_df.dtypes

In [None]:
apps_df['Revenue'] = apps_df['Installs'] * apps_df['Price']

In [None]:
apps_df.dtypes

In [None]:
filtered_df = apps_df[
    (apps_df['Installs'] >= 10000) &                    # Filter for installs >= 10,000
    (apps_df['Revenue'] >= 10000) &                    # Filter for revenue >= 10,000
    (apps_df['Android Ver'] > 4) &                     # Filter for Android version > 4
    (apps_df['size_in_MB'] > 15) &                     # Filter for apps with size greater than 15 MB
    (apps_df['Content Rating'] == 'Everyone') &        # Filter for 'Everyone' content rating
    (apps_df['App'].str.len() <= 30)             # Filter for app names with length <= 30
]

In [None]:
#Now we have filtered the dataset
filtered_df.count()

In [None]:
top_categories = filtered_df.groupby('Category')['Installs'].sum().nlargest(3).index

In [None]:
top_filtered_df = filtered_df[filtered_df['Category'].isin(top_categories)]

In [None]:
top_filtered_df.count()

In [None]:
agg_data = top_filtered_df.groupby(['Category', 'Type']).agg({
    'Installs': 'mean',
    'Revenue': 'mean'
}).reset_index()

In [None]:
categories = agg_data['Category'].unique()
types = agg_data['Type'].unique()

In [None]:
import plotly.graph_objects as go

In [None]:
from IPython.display import display

In [None]:
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [None]:
plot_containers=""

In [None]:
# Save each Plotly figure to an HTML file
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    # Append the plot and its insight to plot_containers
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

In [None]:
plot_width=400
plot_height=300
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}

In [None]:
fig2 = go.Figure()

In [None]:
for t in types:
    fig2.add_trace(go.Bar(
        x=agg_data[agg_data['Type'] == t]['Category'],
        y=agg_data[agg_data['Type'] == t]['Installs'],
        name=f'Avg Installs ({t})',
        marker=dict(opacity=0.7)
    ))
for t in types:
    fig2.add_trace(go.Scatter(
        x=agg_data[agg_data['Type'] == t]['Category'],
        y=agg_data[agg_data['Type'] == t]['Revenue'],
        mode='lines+markers',
        name=f'Avg Revenue ({t})',
        yaxis='y2'
    ))
fig2.update_layout(
    title="Comparison of Average Installs and Revenue for Free vs Paid Apps",
    xaxis=dict(title='Category'),
    yaxis=dict(title='Average Installs', side='left'),
    yaxis2=dict(title='Average Revenue', overlaying='y', side='right', showgrid=False),
    barmode='group',
    legend=dict(title='Metrics'),
    template='plotly'
)
fig2.update_layout(
        xaxis_title='Category',
        yaxis_title='Average Installation',
        plot_bgcolor='black',
        paper_bgcolor='black',
        font_color='white',
        title_font=dict(size=16),
        xaxis=dict(title_font=dict(size=12)),
        yaxis=dict(title_font=dict(size=12)),
        margin=dict(l=20, r=20, t=40, b=20)
)
save_plot_as_html(fig2, "Dual-axis chart.html", "This is a dual-axis plot")

In [None]:
fig2.show()

In [None]:
dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Analytics Dashboard</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444;
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            padding: 20px;
        }}
        .plot-container {{
            border: 2px solid #555;
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0, 0, 0, 0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container:hover .insights {{
            display: block;
        }}
    </style>
    <script>
        function openPlot(filename) {{
            window.open(filename, '_blank');
        }}
    </script>
</head>
<body>
    <div class="header">
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
        <h1>Analytics Dashboard</h1>
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
    </div>
    <div class="container">
        {plots}
    </div>
</body>
</html>
"""

In [None]:
import webbrowser
import os
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [None]:
final_html = dashboard_html.format(plots=plot_containers, plot_width=plot_width, plot_height=plot_height)

In [None]:
html_files_path = "/home/user/Documents"
dashboard_path=os.path.join(html_files_path,'web page.html')

In [None]:
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

In [None]:
webbrowser.open('file://' + os.path.realpath(dashboard_path))