In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Read data
df = pd.read_csv('../data/raw/sample.csv', parse_dates=['date'], dayfirst=False)
print('Rows:', len(df))
print('Unique customers:', df['customer_id'].nunique(dropna=True))

# Top 10 products by count
top = df['product_name'].value_counts().head(10)
print('
Top products:
', top)

# Purchases over time (count per day)
df['date'] = pd.to_datetime(df['date'], errors='coerce')
time_series = df.dropna(subset=['date']).groupby(df['date'].dt.date).size()
print('
Purchases over time (rows per day):
', time_series)

# Plot
fig, ax = plt.subplots(figsize=(8,4))
time_series.plot(ax=ax, marker='o')
ax.set_title('Purchases over time')
ax.set_xlabel('Date')
ax.set_ylabel('Number of line items')
plt.tight_layout()
plt.show()