# Implementation (5)

**1. Work with time-indexed data: resample, interpolate missing.**     

In [1]:
import pandas as pd
rng = pd.date_range('2023-01-01', periods=10, freq='D')
df = pd.DataFrame({'value': [1, None, 3, None, 5, 6, None, 8, 9, 10]}, index=rng)
df_resampled = df.resample('2D').mean()
df_interpolated = df.interpolate(method='time')


**2. Pivot a DataFrame from long to wide format and back.**


In [2]:
df = pd.DataFrame({
    'date': ['2023-01', '2023-01', '2023-02', '2023-02'],
    'product': ['A', 'B', 'A', 'B'],
    'sales': [100, 200, 150, 250]
})
df_wide = df.pivot(index='date', columns='product', values='sales')
df_long = df_wide.reset_index().melt(id_vars='date', value_name='sales')


**3. Create and query a multi-index DataFrame.**

In [3]:
arrays = [['North', 'North', 'South', 'South'], ['2023', '2024', '2023', '2024']]
index = pd.MultiIndex.from_arrays(arrays, names=('Region', 'Year'))
df = pd.DataFrame({'Sales': [250, 300, 220, 270]}, index=index)
print(df.loc['North'])  # Filter by first index level
print(df.xs('2023', level='Year'))  # Filter by second index level


      Sales
Year       
2023    250
2024    300
        Sales
Region       
North     250
South     220


**4. Convert columns to categorical and measure memory savings.**

In [4]:
df = pd.DataFrame({
    'City': ['Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Delhi'],
    'Rating': [4, 3, 5, 4, 3]
})
print(df.memory_usage(deep=True))
df['City'] = df['City'].astype('category')
print(df.memory_usage(deep=True))  # Observe reduced usage

Index     132
City      312
Rating     40
dtype: int64
Index     132
City      238
Rating     40
dtype: int64


**5. Read/write Parquet files and compare performance with CSV.**

In [5]:
import pandas as pd
import time

df = pd.DataFrame({'A': range(1000000), 'B': ['X'] * 1000000})

# CSV
start = time.time()
df.to_csv('data.csv', index=False)
df_csv = pd.read_csv('data.csv')
print("CSV time:", time.time() - start)

# Parquet
start = time.time()
df.to_parquet('data.parquet', index=False)
df_parquet = pd.read_parquet('data.parquet')
print("Parquet time:", time.time() - start)

CSV time: 1.9433872699737549
Parquet time: 0.6122832298278809
