In [42]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pytz

In [43]:
apps_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_GooglePlaystore.csv")
reviews_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_UserReviews.csv")

In [44]:
# Check current time and only proceed if between 5 PM to 7 PM IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()
start_time = datetime.strptime('17:00:00', '%H:%M:%S').time()
end_time = datetime.strptime('19:00:00', '%H:%M:%S').time()

In [45]:
if current_time >= start_time and current_time <= end_time:
    # Merge datasets
    merged_df = pd.merge(apps_df, reviews_df, on='App', how='inner')
    
    # Debug: Print shape before filtering
    print(f"Initial merged data shape: {merged_df.shape}")
    
    # Apply all filters
    filtered_df = merged_df[
        (merged_df['Rating'] > 3.5) &
        (merged_df['Category'].isin(['Game', 'Beauty', 'Business', 'Comics', 
                                   'Communication', 'Dating', 'Entertainment', 
                                   'Social', 'Event'])) &
        (merged_df['Reviews'] > 500) &
        (~merged_df['App'].str.contains('S', case=False)) &
        (merged_df['Sentiment_Subjectivity'] > 0.5) &
        (merged_df['Installs'] > 50000)
    ].copy()
    
    # Debug: Print shape after filtering
    print(f"Filtered data shape: {filtered_df.shape}")
    
    if len(filtered_df) == 0:
        print("No apps match all the filtering criteria. Please relax some filters.")
    else:
        # Group by App to get average values
        grouped_df = filtered_df.groupby(['App', 'Category', 'Size']).agg({
            'Rating': 'mean',
            'Installs': 'mean',
            'Reviews': 'mean'
        }).reset_index()
        
        # Translate category names
        def translate_category(category):
            if category == 'Beauty':
                return 'सुंदरता'  # Hindi for Beauty
            elif category == 'Business':
                return 'வணிகம்'  # Tamil for Business
            elif category == 'Dating':
                return 'Dating'  # German for Dating is same as English
            else:
                return category
        
        grouped_df['Translated_Category'] = grouped_df['Category'].apply(translate_category)
        
        # Create the bubble chart
        plt.figure(figsize=(14, 8))
        artists = []  # To store our plot artists for the legend
        
        # Plot all categories except Game first
        for category in grouped_df[grouped_df['Category'] != 'Game']['Translated_Category'].unique():
            category_data = grouped_df[grouped_df['Translated_Category'] == category]
            if len(category_data) > 0:
                scatter = plt.scatter(
                    x=category_data['Size'], 
                    y=category_data['Rating'],
                    s=category_data['Installs']/10000,
                    alpha=0.6,
                    label=category
                )
                artists.append(scatter)
        
        # Plot Game category in pink
        game_data = grouped_df[grouped_df['Category'] == 'Game']
        if not game_data.empty:
            game_scatter = plt.scatter(
                x=game_data['Size'], 
                y=game_data['Rating'],
                s=game_data['Installs']/10000,
                alpha=0.6,
                color='pink',
                label='Game'
            )
            artists.append(game_scatter)
        
        # Only add legend if we have artists
        if artists:
            plt.legend(handles=artists, title='Categories', bbox_to_anchor=(1.05, 1), loc='upper left')
        
        # Add chart elements
        plt.title('App Size vs Rating (Bubble Size = Installs)', fontsize=16)
        plt.xlabel('App Size (MB)', fontsize=12)
        plt.ylabel('Average Rating', fontsize=12)
        plt.grid(True, linestyle='--', alpha=0.7)
        
        # Adjust layout
        plt.tight_layout()
        
        # Show the plot
        plt.show()
        
else:
    print("The bubble chart is only available between 5 PM to 7 PM IST.")

The bubble chart is only available between 5 PM to 7 PM IST.


In [46]:
!pip install dash



In [47]:
import plotly.express as px
import dash
from dash import dcc, html

In [48]:
apps_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_GooglePlaystore.csv")
reviews_df = pd.read_csv("E:/Data Analyst Intern Tasks/Jupyter/Cleaned Datasets/Cleaned_UserReviews.csv")

In [49]:
merged_df = pd.merge(apps_df, reviews_df, on='App')

In [50]:
categories = ['Game', 'Beauty', 'Business', 'Comics', 'Communication', 'Dating', 'Entertainment', 'Social', 'Events']

In [None]:
filtered_df = merged_df[
    (merged_df['Rating'] > 3.5) &
    (merged_df['Category'].isin(categories)) &
    (merged_df['Reviews'] > 500) &
    (~merged_df['App'].str.contains('s', case=False)) &
    (merged_df['Sentiment_Subjectivity'] > 0.5) &
    (merged_df['Installs'] > 50000)
].copy()

In [52]:
# Translate category names
translation_map = {
    'Beauty': 'सौंदर्य',           
    'Business': 'வணிகம்',    
    'Dating': 'Partnersuche'    
}

In [53]:
filtered_df['Category_Display'] = filtered_df['Category'].replace(translation_map)

In [54]:
# Assign bubble color
filtered_df['Color'] = filtered_df['Category'].apply(lambda x: 'pink' if x == 'Game' else 'blue')

In [56]:
# Create the Plotly chart
def create_bubble_chart():
    fig = px.scatter(
        filtered_df,
        x='Size_MB',
        y='Rating',
        size='Installs',
        color='Category_Display',
        hover_name='App',
        size_max=60,
        color_discrete_map={k: 'pink' if k == 'Game' else 'blue' for k in filtered_df['Category_Display'].unique()}
    )
    fig.update_layout(title="App Size vs Rating (Bubble Size = Installs)")
    return fig

In [57]:
# Check if current IST time is between 5 PM and 7 PM
def is_within_time_window():
    ist = pytz.timezone('Asia/Kolkata')
    now_ist = datetime.now(ist).time()
    return now_ist >= datetime.strptime('17:00', '%H:%M').time() and now_ist <= datetime.strptime('19:00', '%H:%M').time()

In [58]:
# Dash App
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H2("Google Play Store App Analysis"),
    dcc.Graph(figure=create_bubble_chart()) if is_within_time_window() else html.H4("This chart is only available between 5 PM and 7 PM IST.")
])

In [59]:
if __name__ == '__main__':
    app.run(debug=True)