# advanced_case_studies

## Advanced Case Studies — Colab-ready

### 1) Chunked CSV processing (aggregation without loading all data)

In [None]:
# Simulate chunked processing using the sample_data.csv (small example)
import pandas as pd
agg = {}
chunksize = 500
for chunk in pd.read_csv('datasets/sample_data.csv', chunksize=chunksize):
    chunk['city'] = chunk['city'].astype('category')
    g = chunk.groupby('city')['price'].sum()
    for k,v in g.items():
        agg[k] = agg.get(k, 0) + v
agg

### 2) Feature engineering: lag and rolling features for time-series

In [None]:
ts = pd.read_csv('datasets/timeseries.csv', parse_dates=['timestamp'], index_col='timestamp')
df_ts = ts.copy()
df_ts['lag_1'] = df_ts['value'].shift(1)
df_ts['rolling_24h_mean'] = df_ts['value'].rolling(window=24).mean()
df_ts[['value','lag_1','rolling_24h_mean']].dropna().head()

### 3) Join optimization note (example)

In [None]:
# Create a fake lookup and join - demonstrate categorical join speedup
import pandas as pd
left = pd.read_csv('datasets/sample_data.csv', usecols=['id','city','ts'])
right = pd.DataFrame({'city': ['Karachi','Lahore'], 'city_code': [1,2]})
left['city'] = left['city'].astype('category')
right['city'] = right['city'].astype('category')
joined = left.merge(right, on='city', how='left')
joined.head()