In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime
import nltk
import webbrowser
import os
import pytz

In [2]:
nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Suhani\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
apps_df = pd.read_csv('Play Store Data.csv')
reviews_df = pd.read_csv('User Reviews.csv')
apps_df.head(),reviews_df.head()

(                                                 App        Category  Rating  \
 0     Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
 1                                Coloring book moana  ART_AND_DESIGN     3.9   
 2  U Launcher Lite – FREE Live Cool Themes, Hide ...  ART_AND_DESIGN     4.7   
 3                              Sketch - Draw & Paint  ART_AND_DESIGN     4.5   
 4              Pixel Draw - Number Art Coloring Book  ART_AND_DESIGN     4.3   
 
   Reviews  Size     Installs  Type Price Content Rating  \
 0     159   19M      10,000+  Free     0       Everyone   
 1     967   14M     500,000+  Free     0       Everyone   
 2   87510  8.7M   5,000,000+  Free     0       Everyone   
 3  215644   25M  50,000,000+  Free     0           Teen   
 4     967  2.8M     100,000+  Free     0       Everyone   
 
                       Genres      Last Updated         Current Ver  \
 0               Art & Design   January 7, 2018               1.0.0   
 1  Art 

In [4]:
apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns:
    apps_df[column].fillna(apps_df[column].mode()[0], inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df = apps_df[apps_df['Rating'] <= 5]
reviews_df.dropna(subset=['Translated_Review'], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  apps_df[column].fillna(apps_df[column].mode()[0], inplace=True)


In [5]:
merged_df = pd.merge(apps_df, reviews_df, on='App', how='inner')

In [6]:
apps_df['Installs'] = apps_df['Installs'].astype(str).str.replace(',', '').str.replace('+', '').astype(float)

In [7]:
apps_df['Price'] = apps_df['Price'].str.replace('$', '').astype(float)

In [8]:
apps_df['Revenue'] = apps_df['Price'] * apps_df['Installs']

In [9]:
apps_df['Revenue'] = pd.to_numeric(apps_df['Revenue'], errors='coerce')
apps_df['Size'] = apps_df['Size'].str.replace('M', '').replace('Varies with device', None)
apps_df['Size'] = pd.to_numeric(apps_df['Size'], errors='coerce')  

In [10]:
filtered_apps = apps_df[(apps_df['Installs'] >= 100000) & 
(apps_df['Revenue'] >= 100000) & 
(apps_df['Android Ver'].str.contains('4.0|4.1|4.2|4.3|4.4|5.0|5.1|6.0|7.0|7.1|8.0|8.1|9|10|11|12')) & 
(apps_df['Size'] > 15) & 
(apps_df['Content Rating'] == 'Everyone') & 
(apps_df['App'].str.len() <=30)]

In [11]:
filtered_apps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Revenue
853,Toca Life: City,EDUCATION,4.7,31085,24.0,500000.0,Paid,3.99,Everyone,Education;Pretend Play,"July 6, 2018",1.5-play,4.4 and up,1995000.0
854,Toca Life: Hospital,EDUCATION,4.7,3528,24.0,100000.0,Paid,3.99,Everyone,Education;Pretend Play,"June 12, 2018",1.1.1-play,4.4 and up,399000.0
1831,The Game of Life,GAME,4.4,18621,63.0,100000.0,Paid,2.99,Everyone,Board,"July 4, 2018",2.1.2,4.4 and up,299000.0
1833,The Room: Old Sins,GAME,4.9,21119,48.0,100000.0,Paid,4.99,Everyone,Puzzle,"April 18, 2018",1.0.1,4.4 and up,499000.0
1836,RollerCoaster Tycoon® Classic,GAME,4.6,10795,69.0,100000.0,Paid,5.99,Everyone,Simulation,"December 21, 2017",1.2.1.1712080,4.0.3 and up,599000.0


In [12]:
top_categories = filtered_apps['Category'].value_counts().head(3).index
filtered_apps = filtered_apps[filtered_apps['Category'].isin(top_categories)]

In [13]:
filtered_apps['Type'] = filtered_apps['Type'].str.strip()
avg_metrics = filtered_apps.groupby(['Category', 'Type'])[['Installs', 'Revenue']].mean().reset_index()
avg_metrics

Unnamed: 0,Category,Type,Installs,Revenue
0,EDUCATION,Paid,300000.0,1197000.0
1,FAMILY,Paid,466666.666667,978666.7
2,GAME,Paid,228571.428571,497714.3


In [14]:
# This script generates and displays a graph using matplotlib.
# It creates a dataset, plots a graph, and labels axes.
# Running this code will display the graph visually.
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).hour

In [15]:
chart_path = 'dual_axis_chart.html'
display_page_path = 'display_dual_axis_chart.html'

if 13 <= current_time < 14:  
    fig = go.Figure()
    for t in avg_metrics['Type'].unique():
        df_temp = avg_metrics[avg_metrics['Type'] == t]
        fig.add_trace(go.Bar(x = df_temp['Category'], y = df_temp['Installs'], name = f'Installs ({t})', marker_color = 'blue'))
        fig.add_trace(go.Scatter(x = df_temp['Category'], y = df_temp['Revenue'], mode = 'lines+markers', name=f"Revenue ({t})", marker_color = 'red'))
    
    fig.update_layout(title = 'Comparison of Installs & Revenue for Free vs. Paid Apps',
                      xaxis_title = 'App Category', yaxis_title = 'Average Installs',
                      yaxis2 = dict(title = 'Average Revenue ($)', overlaying = 'y', side = 'right'))
    fig.write_html(chart_path)
    html_content = f'''
    <!DOCTYPE html>
    <html lang = 'en'>
    <head>
        <meta charset = 'UTF-8'>
        <meta name = 'viewport' content = 'width = device-width, initial-scale = 1.0'>
        <title> Dual-Axis Chart </title>
        <script>
            function openChart() {{
                window.open('{chart_path}', '_blank');
            }}
        </script>
    </head>
    <body>
        <h2> Dual-Axis Chart: Installs vs. Revenue </h2>
        <button onclick = 'openChart()'> Open Chart </button>
    </body>
    </html>
    '''
else:
    if os.path.exists(chart_path):
        os.remove(chart_path)
    html_content = '''
    <!DOCTYPE html>
    <html lang = 'en'>
    <head>
        <meta charset = 'UTF-8'>
        <meta name = 'viewport" content = 'width = device-width, initial-scale = 1.0'>
        <title> Dual-Axis Chart </title>
    </head>
    <body>
        <h2> Dual-Axis Chart is not available at this time. </h2>
        <p> Please check back between 1 PM - 2 PM IST. </p>
    </body>
    </html>
    '''
with open(display_page_path, 'w') as file:
    file.write(html_content)