In [8]:
pip install pandas


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\Lenovo\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [9]:
pip install matplotlib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\Lenovo\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pytz

In [11]:
# Load dataset
file_path = r"C:\Users\Lenovo\Desktop\NULL CLASS\Play Store Data.csv"
df = pd.read_csv(file_path)

In [13]:
# Data Cleaning
# Convert 'Installs' to numeric (remove ',' and '+')
df["Installs"] = df["Installs"].astype(str).str.extract(r'(\d+)')  # Extract numeric part
df["Installs"] = pd.to_numeric(df["Installs"], errors='coerce')  # Convert to numeric

In [14]:
# Convert 'Size' to numeric (remove 'M', ignore 'Varies with device')
df["Size"] = df["Size"].astype(str).apply(lambda x: float(x.replace("M", "")) if "M" in x else np.nan)
df.dropna(subset=["Size"], inplace=True)

In [15]:
# Convert 'Price' to numeric (remove '$')
df["Price"] = df["Price"].astype(str).str.replace("$", "", regex=False)
df["Price"] = pd.to_numeric(df["Price"], errors='coerce').fillna(0)

In [16]:
# Convert 'Android Ver' to numeric (extract the first numeric part)
df["Android Ver"] = df["Android Ver"].astype(str).str.extract(r"(\d+\.\d+)").astype(float)

In [17]:
# Apply Filters
df_filtered = df[
    (df["Installs"] >= 10000) &
    ((df["Price"] * df["Installs"] >= 10000) | (df["Type"] == "Free")) &
    (df["Android Ver"] > 4.0) &
    (df["Size"] > 15) &
    (df["Content Rating"] == "Everyone") &
    (df["App"].str.len() <= 30)
]

In [28]:
print(df_filtered.head())

Empty DataFrame
Columns: [App, Category, Rating, Reviews, Size, Installs, Type, Price, Content Rating, Genres, Last Updated, Current Ver, Android Ver]
Index: []


In [18]:
# Get Top 3 Categories
top_categories = df_filtered["Category"].value_counts().nlargest(3).index

In [19]:
# Filter Data for Top Categories
df_top = df_filtered[df_filtered["Category"].isin(top_categories)]

In [20]:
# Aggregate Data
category_stats = df_top.groupby(["Category", "Type"]).agg(
    avg_installs=("Installs", "mean"),
    avg_revenue=("Price", "sum")
).reset_index()

In [21]:
ist = pytz.timezone("Asia/Kolkata")##for current time 
current_time = datetime.datetime.now(ist)  # Ensure it's properly fetched
print(f"Current IST time: {current_time.strftime('%Y-%m-%d %H:%M:%S')}")


Current IST time: 2025-04-03 08:42:17


In [22]:
if not (13 <= current_time.hour < 14):
    print("Chart is only available between 1 PM and 2 PM IST.")
else:
    # Plot Dual-Axis Chart
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    for category in top_categories:
        df_cat = category_stats[category_stats["Category"] == category]
        ax1.bar(df_cat["Type"], df_cat["avg_installs"], alpha=0.6, label=f'{category} Installs')
        ax2.plot(df_cat["Type"], df_cat["avg_revenue"], marker='o', linestyle='-', label=f'{category} Revenue')

    ax1.set_xlabel("App Type (Free vs Paid)")
    ax1.set_ylabel("Average Installs", color='b')
    ax2.set_ylabel("Average Revenue ($)", color='r')
    ax1.set_title("Comparison of Avg Installs and Revenue for Top App Categories")
    ax1.legend(loc='upper left')
    ax2.legend(loc='upper right')

    plt.show()


Chart is only available between 1 PM and 2 PM IST.


ANALYSIS

In [None]:

# 1. Data Cleaning & Transformation
# Installs: Converted to numeric format by extracting digits.

# Size: Converted to numeric, excluding apps where size varies.

# Price: Converted to numeric after removing the "$" symbol.

# Android Version: Extracted the numeric part for proper filtering.

# 2. Filtering Conditions
# The dataset was filtered to meet the following strict criteria:

# Minimum 10,000 installs.

# Revenue must be ≥ $10,000 (for paid apps) or the app must be free.

# Android version > 4.0.

# App size > 15 MB.

# Content rating should be "Everyone".

# App name length ≤ 30 characters (including spaces and special characters).

# 3. Identifying Top 3 Categories
# The script determines the top 3 app categories based on the highest number of apps.

# Data is grouped by Category and Type (Free vs. Paid) to compute:

# Average installs

# Total revenue for each category-type combination.

# 4. Time-Based Restriction
# The chart is only displayed between 1 PM - 2 PM IST.

# If executed outside this window, it prints a message instead of showing the graph.

# 5. Visualization
# Dual-axis Chart:

# Bar chart (left axis) → Represents average installs.

# Line plot (right axis) → Represents total revenue.

# X-axis: Free vs. Paid apps.

# Y1-axis (left): Average installs (blue).

# Y2-axis (right): Total revenue in dollars (red).

# Legend Placement:

# Installs (upper left).

# Revenue (upper right).


'Analysis of the Task\nThe task aims to generate a dual-axis chart comparing average installs and average revenue for free vs. paid apps within the top 3 app categories, with strict filtering conditions.\n\n1. Data Cleaning & Transformation\nInstalls: Converted to numeric format by extracting digits.\n\nSize: Converted to numeric, excluding apps where size varies.\n\nPrice: Converted to numeric after removing the "$" symbol.\n\nAndroid Version: Extracted the numeric part for proper filtering.\n\n2. Filtering Conditions\nThe dataset was filtered to meet the following strict criteria:\n\nMinimum 10,000 installs.\n\nRevenue must be ≥ $10,000 (for paid apps) or the app must be free.\n\nAndroid version > 4.0.\n\nApp size > 15 MB.\n\nContent rating should be "Everyone".\n\nApp name length ≤ 30 characters (including spaces and special characters).\n\n3. Identifying Top 3 Categories\nThe script determines the top 3 app categories based on the highest number of apps.\n\nData is grouped by Categ