# Pandas_Advanced

## Setup and imports

In [None]:
# In Colab you might need to install extras
# !pip install pyarrow fastparquet seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.__version__, np.__version__

## Load sample data

In [None]:
df = pd.read_csv('datasets/sample_data.csv', parse_dates=['ts'])
df.info(memory_usage='deep')
df.head()

## Categorical optimization and groupby examples

In [None]:
cat_cols = ['city', 'product']
for c in cat_cols:
    df[c] = df[c].astype('category')
print(df.info(memory_usage='deep'))

agg = df.groupby('city')['price'].agg(['count','mean','sum']).reset_index()
agg

## Time-series resample and plotting

In [None]:
ts = pd.read_csv('datasets/timeseries.csv', parse_dates=['timestamp'], index_col='timestamp')
monthly = ts['value'].resample('M').sum()
rolling_3m = monthly.rolling(window=3).mean()

plt.figure(figsize=(10,4))
monthly.plot(label='monthly sum')
rolling_3m.plot(label='3-month rolling mean')
plt.legend(); plt.title('Monthly aggregation and rolling mean')

## Visual analysis: sales distribution and top products

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x='city', y='price', data=df)
plt.title('Price distribution by city')

## Advanced groupby + transform: rank within groups

In [None]:
df['total'] = df['price'] * df['qty']
df['rank_in_city'] = df.groupby('city')['total'].rank(method='dense', ascending=False)
df[['city','product','total','rank_in_city']].sort_values(['city','rank_in_city']).head(10)