In [None]:
"""
Create a violin plot to visualize the distribution of ratings for each app category, but only include categories with more
than 50 apps and app name should contain letter “C” and exclude apps with fewer than 10 reviews and rating should be less 4.0. 
This graph should not work between 6 PM to 11PM
"""

In [24]:
import pytz
from datetime import datetime
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool, Select
from bokeh.layouts import column, row

In [26]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Get current time in San Francisco
sf_tz = pytz.timezone('America/Los_Angeles')
current_time = datetime.now(sf_tz)
current_hour = current_time.hour

# Check if we're in the restricted time window (18:00 - 23:00)
if 18 <= current_hour <= 23:
    print("Sorry, this visualization is not available between 6 PM and 11 PM")
else:
    # Read the data
    df = pd.read_csv('Play Store Data (2).csv')
    
    # Filter conditions:
    # 1. Apps with letter "C" in name
    # 2. More than 10 reviews
    # 3. Rating less than 4.0
    filtered_df = df[
        df['App'].str.contains('C', case=True) &
        (df['Reviews'] > 10) &
        (df['Rating'] < 4.0)
    ]
    
    # Count apps per category and filter categories with more than 50 apps
    category_counts = filtered_df['Category'].value_counts()
    valid_categories = category_counts[category_counts > 50].index
    
    # Final filtered dataframe
    final_df = filtered_df[filtered_df['Category'].isin(valid_categories)]
    
    if not final_df.empty:
        # Create the violin plot
        plt.figure(figsize=(12, 6))
        sns.violinplot(data=final_df, x='Category', y='Rating')
        plt.xticks(rotation=45, ha='right')
        plt.title('Distribution of Ratings by Category\
(Apps with "C", >10 reviews, rating <4.0)')
        plt.tight_layout()
        plt.show()
        
        print("Number of apps in each category:")
        print(final_df['Category'].value_counts())
    else:
        print("No data available after applying all filters")

print("Analysis complete")

Sorry, this visualization is not available between 6 PM and 11 PM
Analysis complete


In [32]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Play Store Data (2).csv')

# Check the data types and unique values in the 'Reviews' column
data.info()
print(data['Reviews'].unique())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB
['159' '967' '87510' ... '603' '1195' '398307']


In [34]:
# Convert 'Reviews' column to numeric, coercing errors to NaN
data['Reviews'] = pd.to_numeric(data['Reviews'], errors='coerce')

# Drop rows where 'Reviews' is NaN
data = data.dropna(subset=['Reviews'])

# Reapply the filtering logic
def filter_data(df):
    filtered_df = df[
        df['App'].str.contains('C', case=True, na=False) &
        (df['Reviews'] > 10) &
        (df['Rating'] < 4.0)
    ]
    category_counts = filtered_df['Category'].value_counts()
    valid_categories = category_counts[category_counts > 50].index
    return filtered_df[filtered_df['Category'].isin(valid_categories)]

# Filter the data
filtered_data = filter_data(data)

# Check the filtered data
print(filtered_data.head())
print("Filtered data shape:", filtered_data.shape)

                                 App Category  Rating  Reviews  Size  \
2033             Coloring book moana   FAMILY     3.9    974.0   14M   
2167                  Chess PRO Free   FAMILY     3.8   1123.0  5.7M   
2182     Elmo Calls by Sesame Street   FAMILY     3.9   6903.0   25M   
2195     Elmo Calls by Sesame Street   FAMILY     3.9   6903.0   25M   
2221  Ever After High™ Charmed Style   FAMILY     3.9  44062.0   54M   

        Installs  Type Price Content Rating                     Genres  \
2033    500,000+  Free     0       Everyone  Art & Design;Pretend Play   
2167    100,000+  Free     0       Everyone          Board;Brain Games   
2182  1,000,000+  Free     0       Everyone   Educational;Pretend Play   
2195  1,000,000+  Free     0       Everyone   Educational;Pretend Play   
2221  1,000,000+  Free     0       Everyone        Casual;Pretend Play   

           Last Updated Current Ver   Android Ver  
2033   January 15, 2018       2.0.0  4.0.3 and up  
2167  September 8,

In [38]:
import pytz
from datetime import datetime
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool, Select
from bokeh.layouts import column, row

# Create timezone options
timezones = {
    'India': 'Asia/Kolkata',
    'Canada': 'America/Toronto',
    'London': 'Europe/London',
    'Russia': 'Europe/Moscow'
}

# Function to convert time to different timezone
def get_current_time(timezone_str):
    tz = pytz.timezone(timezone_str)
    return datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S %Z')

# Create the interactive visualization
output_file("multi_timezone_app.html")

# Create the main figure
p = figure(width=900, height=500,
          x_range=filtered_data['Category'].unique(),
          title="Distribution of Ratings by Category",
          x_axis_label="Category", 
          y_axis_label="Rating")

source = ColumnDataSource(filtered_data)
p.circle(x='Category', y='Rating', source=source, size=8, color="navy", alpha=0.5)

# Add hover tool
hover = HoverTool(tooltips=[
    ("App", "@App"),
    ("Rating", "@Rating"),
    ("Reviews", "@Reviews"),
    ("Category", "@Category"),
    ("Current Time", "")  # Will be updated by JavaScript
])
p.add_tools(hover)

# Rotate x-axis labels
p.xaxis.major_label_orientation = 0.7

# Create timezone selector
timezone_select = Select(
    title="Select Timezone:",
    value="Asia/Kolkata",
    options=list(timezones.values()),
    width=200
)

# Add JavaScript callback for timezone updates
script = """
    const timezones = {
        'Asia/Kolkata': 'India',
        'America/Toronto': 'Canada',
        'Europe/London': 'London',
        'Europe/Moscow': 'Russia'
    };
    
    function updateTime() {
        const tz = cb_obj.value;
        const now = new Date().toLocaleString('en-US', {timeZone: tz});
        document.getElementById('current-time').textContent = 
            `Current Time (${timezones[tz]}): ${now}`;
    }
    updateTime();
"""

timezone_select.js_on_change('value', script)

# Layout
layout = column(
    row(timezone_select),
    p
)

show(layout)

print("Multi-timezone interactive visualization created and saved as 'multi_timezone_app.html'")



ValueError: not all callback values are CustomCode instances

In [40]:
from bokeh.models import CustomJS

# Define the JavaScript callback using CustomJS
script = CustomJS(args=dict(), code="""
    const timezones = {
        'Asia/Kolkata': 'India',
        'America/Toronto': 'Canada',
        'Europe/London': 'London',
        'Europe/Moscow': 'Russia'
    };
    
    function updateTime() {
        const tz = cb_obj.value;
        const now = new Date().toLocaleString('en-US', {timeZone: tz});
        document.getElementById('current-time').textContent = 
            `Current Time (${timezones[tz]}): ${now}`;
    }
    updateTime();
""")

# Attach the callback to the timezone selector
timezone_select.js_on_change('value', script)

# Layout
layout = column(
    row(timezone_select),
    p
)

# Save and show the updated visualization
output_file("multi_timezone_app.html")
show(layout)

print("Multi-timezone interactive visualization created and saved as 'multi_timezone_app.html'")

Multi-timezone interactive visualization created and saved as 'multi_timezone_app.html'
