In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import os
import webbrowser
import nltk
from datetime import datetime
import pytz

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Suhani\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
apps_df=pd.read_csv('Play Store data.csv')
reviews_df=pd.read_csv('User Reviews.csv')
apps_df.head(), reviews_df.head()

(                                                 App        Category  Rating  \
 0     Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
 1                                Coloring book moana  ART_AND_DESIGN     3.9   
 2  U Launcher Lite – FREE Live Cool Themes, Hide ...  ART_AND_DESIGN     4.7   
 3                              Sketch - Draw & Paint  ART_AND_DESIGN     4.5   
 4              Pixel Draw - Number Art Coloring Book  ART_AND_DESIGN     4.3   
 
   Reviews  Size     Installs  Type Price Content Rating  \
 0     159   19M      10,000+  Free     0       Everyone   
 1     967   14M     500,000+  Free     0       Everyone   
 2   87510  8.7M   5,000,000+  Free     0       Everyone   
 3  215644   25M  50,000,000+  Free     0           Teen   
 4     967  2.8M     100,000+  Free     0       Everyone   
 
                       Genres      Last Updated         Current Ver  \
 0               Art & Design   January 7, 2018               1.0.0   
 1  Art 

In [4]:
apps_df = apps_df.dropna(subset = ['Rating'])
for column in apps_df.columns:
    apps_df[column].fillna(apps_df[column].mode()[0], inplace = True)
apps_df.drop_duplicates(inplace=True)
apps_df = apps_df[apps_df['Rating'] <= 5]
reviews_df.dropna(subset = ['Translated_Review'], inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  apps_df[column].fillna(apps_df[column].mode()[0], inplace = True)


In [5]:
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors = 'coerce')

In [6]:
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors = 'coerce') 

In [7]:
filtered_apps = apps_df[
(apps_df['App'].str.contains('C', case = False, na = True))&
(apps_df['Reviews'] >=10)&
(apps_df['Rating'] < 4.0)
]

In [8]:
category_counts = filtered_apps['Category'].value_counts()

In [9]:
valid_categories = category_counts[category_counts > 50].index

In [10]:
filtered_apps = filtered_apps[filtered_apps['Category'].isin(valid_categories)]

In [11]:
# This script generates and displays a graph using matplotlib.
# It creates a dataset, plots a graph, and labels axes.
# Running this code will display the graph visually.
ist = pytz.timezone('Asia/Kolkata')
current_hour = datetime.now(ist).hour

In [12]:
chart_path = 'Violin_plot.html'
display_page_path = 'display_violin_plot.html'
if 16 <= current_hour < 18:
    fig = px.violin(filtered_apps, x = 'Category', y = 'Rating', box = True, points = 'all', title = 'Rating Distribution by Category')
    fig.write_html(chart_path)
    html_content = f'''
    <!DOCKTYPE html>
    <html lang = 'en'>
    <head>
        <meta charset = 'UTF-8'>
        <meta name = 'viewport' content = 'width = device - width, initial - scale = 1.0'>
        <title> Violin Plot </title>
        <script>
            function openchart() {{
            window.open('{chart_path}', '_blank');
            }}
        </script>
    </head>
    <body>
        <h2> Violin Plot: Rating Distribution by Category </h2>
        <button onclick ='openChart()'> Open Chart </button>
    </body>
    </html>
    '''
else:
    if os.path.exists(chart_path):
        os.remove(chart_path)
    html_content = '''
    <!DOCTYPE html>
    <html lang = 'en'>
    <head>
        <meta charset = 'UTF-8'>
        <meta name = 'viewport' content = 'width = device-width, initial-scale = 1.0'>
        <title> Violin Plot </title>
    </head>
    <body>
        <h2> Violin Plot is not available at this time. </h2>
        <p> Please check back between 4 PM - 6 PM IST. </p>
    </body>
    </html>
    '''
with open(display_page_path, 'w') as file:
    file.write(html_content)