In [None]:
# Cell 1 - imports & data load
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import os
BASE = os.path.abspath(os.path.join(".."))  # adjust if necessary
data_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "data"))  # when notebook in notebooks/
print("Data dir:", data_dir)

customers = pd.read_csv(os.path.join(data_dir, "blinkit_customers.csv"))
orders = pd.read_csv(os.path.join(data_dir, "blinkit_orders.csv"))
order_items = pd.read_csv(os.path.join(data_dir, "blinkit_order_items.csv"))
feedback = pd.read_csv(os.path.join(data_dir, "blinkit_customer_feedback.csv"))
products = pd.read_csv(os.path.join(data_dir, "blinkit_products.csv"))
marketing = pd.read_csv(os.path.join(data_dir, "blinkit_marketing_performance.csv"))

print("Loaded:", customers.shape, orders.shape, order_items.shape, feedback.shape, products.shape, marketing.shape)

In [None]:
customers.head()
orders.head()
feedback.head()

In [None]:
orders['order_date'] = pd.to_datetime(orders['order_date'], errors='coerce')
print("Orders date range:", orders['order_date'].min(), orders['order_date'].max())

print("Customers by segment:")
display(customers['customer_segment'].value_counts())

print("Feedback sentiment counts:")
display(feedback['sentiment'].value_counts())

In [None]:
cat_counts = feedback['feedback_category'].value_counts().reset_index().rename(columns={'index': 'category', 'feedback_category': 'count'})
fig = px.bar(cat_counts, x='category', y='count', title="Feedback categories")
fig.show()

rating_dist = feedback['rating'].value_counts().sort_index().reset_index().rename(columns={'index':'rating', 'rating':'count'})
fig2 = px.bar(rating_dist, x='rating', y='count', title="Rating distribution")
fig2.show()

In [None]:
orders_per_customer = orders.groupby('customer_id')['order_id'].nunique().reset_index(name='orders')
fig = px.histogram(orders_per_customer, x='orders', nbins=30, title="Orders per customer")
fig.show()

In [None]:
as_of = orders['order_date'].max() + pd.Timedelta(days=1)
agg = orders.groupby('customer_id').agg(
    last_order_date=('order_date','max'),
    frequency=('order_id','nunique'),
    monetary=('order_total','sum')
).reset_index()
agg['recency_days'] = (as_of - agg['last_order_date']).dt.days
agg.describe()