# Data Manipulation and Visualization Example

This notebook demonstrates:
- Creating synthetic test data
- Performing data manipulation with pandas
- Visualizing the results with matplotlib

In [None]:
import os

# Set environment variables
os.environ["MPLCONFIGDIR"] = "/tmp/.config/matplotlib"

# Ensure the directories exist
os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set a random seed for reproducibility
np.random.seed(42)

In [None]:
# Create synthetic test data
dates = pd.date_range(start='2023-01-01', periods=100)
categories = ['A', 'B', 'C']

data = pd.DataFrame({
    'Date': dates,
    'Category': np.random.choice(categories, size=100),
    'Value': np.random.normal(loc=50, scale=10, size=100)
})

# Display first few rows
data.head()

In [None]:
# Data manipulation
# 1. Add a rolling average column
data['RollingAvg'] = data['Value'].rolling(window=7).mean()

# 2. Group by Category and calculate mean value
category_means = data.groupby('Category')['Value'].mean()
category_means

In [None]:
# Visualization
plt.figure(figsize=(12, 6))

# Plot the original Value and Rolling Average for each category
for cat in data['Category'].unique():
    subset = data[data['Category'] == cat]
    plt.plot(subset['Date'], subset['Value'], label=f'{cat} Value', alpha=0.3)
    plt.plot(subset['Date'], subset['RollingAvg'], label=f'{cat} RollingAvg')

plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Value and Rolling Average by Category')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()