In [21]:
# flipkart_analysis.py
# A simple script to explore Flipkart product data and prepare visuals for LinkedIn

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [23]:
# — Step 2: Extract & clean main_category ——————————————————————


df = pd.read_csv(r"C:\Users\j0653\Downloads\flipkart_com-ecommerce_sample.csv\flipkart_com-ecommerce_sample.csv")


print(f"Loaded {len(df):,} rows from Flipkart data")

df.loc[:, 'main_category'] = (
    df['product_category_tree']
      .str.split('>>').str[0]
      .str.replace(r'[\[\]"]', '', regex=True)
      .str.strip()
)
# Drop any category with fewer than 10 items, then copy again
counts   = df['main_category'].value_counts()
big_cats = counts[counts >= 10].index
df        = df[df['main_category'].isin(big_cats)].copy()
print(f"After filtering small categories: {len(df):,} products\n")

Loaded 20,000 rows from Flipkart data
After filtering small categories: 19,668 products



In [24]:
# — Step 3: Clean ratings & compute discount % ———————————————————
df.loc[:, 'product_rating'] = (
    df['product_rating']
      .replace('No rating available', np.nan)
      .pipe(pd.to_numeric, errors='coerce')
)
df.loc[:, 'discount_%'] = (
    (df['retail_price'] - df['discounted_price'])
    / df['retail_price'] * 100
).round(1)


In [27]:
# — Step 4: Compute key metrics —————————————————————————————
total_products = len(df)

# Top category by count
cat_counts  = df['main_category'].value_counts()
top_cat     = cat_counts.idxmax()
top_cat_pct = round(cat_counts.max() / total_products * 100, 1)

# Category with highest avg discount
avg_discounts = df.groupby('main_category')['discount_%'].mean()
top_disc_cat  = avg_discounts.idxmax()
top_disc_val  = round(avg_discounts.max(), 1)

# Avg ratings: budget vs premium
budget_df       = df[df['discounted_price'] < 2000]
premium_df      = df[df['discounted_price'] >= 10000]
avg_rate_budget  = round(budget_df['product_rating'].mean(),  2)
avg_rate_premium = round(premium_df['product_rating'].mean(), 2)

print("Metrics:")
print(f" • Top category       : {top_cat} ({top_cat_pct}%)")
print(f" • Best discount in   : {top_disc_cat} ({top_disc_val}%)")
print(f" • Avg rating < ₹2,000: {avg_rate_budget}/5")
print(f" • Avg rating ≥ ₹10,000: {avg_rate_premium}/5\n")


Metrics:
 • Top category       : Clothing (31.5%)
 • Best discount in   : Sunglasses (60.6%)
 • Avg rating < ₹2,000: 3.79/5
 • Avg rating ≥ ₹10,000: 4.11/5



In [35]:
# — Step 5: Plot & save charts —————————————————————————————


# Plot top categories by product count
plt.figure(figsize=(8, 5))
cat_counts.head(8).plot(kind='bar')
plt.title('Top 8 Categories by Product Count')
plt.xlabel('Category')
plt.ylabel('Number of Products')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('top_categories.png')
plt.close()

# Plot top categories by average discount
plt.figure(figsize=(8, 5))
avg_discounts.sort_values(ascending=False).head(8).plot(kind='bar')
plt.title('Top 8 Categories by Average Discount (%)')
plt.xlabel('Category')
plt.ylabel('Average Discount (%)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('discounts_by_category.png')
plt.close()

# Plot average rating comparison between budget and premium products
plt.figure(figsize=(8, 5))
ratings = [avg_rate_budget, avg_rate_premium]
labels = ['Avg Rating < ₹2,000', 'Avg Rating ≥ ₹10,000']
plt.bar(labels, ratings, color=['blue', 'green'])
plt.title('Average Rating Comparison: Budget vs Premium')
plt.ylabel('Average Rating (out of 5)')
plt.tight_layout()
plt.savefig('avg_rating_by_price.png')
plt.close()

print("Saved charts: top_categories.png, discounts_by_category.png, avg_rating_by_price.png\n")



Saved charts: top_categories.png, discounts_by_category.png, avg_rating_by_price.png



In [30]:
#LinkedIn post snippet

post = f""" Flipkart E‑Commerce Insights: Data Tells the Story!

Analyzed {total_products:,} Flipkart products.

Category Trends: {top_cat} leads with {top_cat_pct}% of listings
Discount Patterns: {top_disc_cat} offers an avg. {top_disc_val}% discount
Rating Insights: • Under ₹2,000 → {avg_rate_budget}/5
• Above ₹10,000 → {avg_rate_premium}/5

I’m excited to share that I have just completed the Google Advanced Data Analytics certificate—strengthening 
my expertise in Python and libraries such as Pandas and Matplotlib.

Tools & Libraries Used: • Python (Pandas, Matplotlib)

Dataset Used: • https://www.kaggle.com/datasets/PromptCloudHQ/flipkart-products • Python (Pandas, Matplotlib)

Stay tuned for my next project on brain tumour detection using explainable AI—leveraging Keras, NumPy, 
Pandas, and scikit-learn to highlight critical tumor regions in medical scans.

#DataAnalytics #Python #MachineLearning #DataScience """

#Copy for LinkedIn

print("--- Copy this for LinkedIn ---\n")
print(post)



--- Copy this for LinkedIn ---

 Flipkart E‑Commerce Insights: Data Tells the Story!

Analyzed 19,668 Flipkart products.

Category Trends: Clothing leads with 31.5% of listings
Discount Patterns: Sunglasses offers an avg. 60.6% discount
Rating Insights: • Under ₹2,000 → 3.79/5
• Above ₹10,000 → 4.11/5

I’m excited to share that I have just completed the Google Advanced Data Analytics certificate—strengthening 
my expertise in Python and libraries such as Pandas and Matplotlib.

Tools & Libraries Used: • Python (Pandas, Matplotlib)

Dataset Used: • https://www.kaggle.com/datasets/PromptCloudHQ/flipkart-products • Python (Pandas, Matplotlib)

Stay tuned for my next project on brain tumour detection using explainable AI—leveraging Keras, NumPy, 
Pandas, and scikit-learn to highlight critical tumor regions in medical scans.

#DataAnalytics #Python #MachineLearning #DataScience 


In [43]:
import os
print("Current working directory:", os.getcwd())

Current working directory: C:\Users\j0653\Documents


In [41]:
import zipfile

# List of chart image files to include in the ZIP
plot_files = [
    'top_categories.png',
    'discounts_by_category.png',
    'avg_rating_by_price.png'
]

# Create ZIP file and add plots
with zipfile.ZipFile('flipkart_charts.zip', 'w') as zipf:
    for file in plot_files:
        zipf.write(file)

print("Created flipkart_charts.zip with all plots.")




Created flipkart_charts.zip with all plots.
