## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import os
import shutil
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

## Step 2: Load the Dataset

In [None]:
# Load the dataset
df = pd.read_csv('data/finalAPData.csv')

print(f"Dataset loaded successfully!")
print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")

## Step 3: Explore Data Structure

In [None]:
# Column information
print("Column Names and Types:")
print(df.dtypes)
print("\n" + "="*50 + "\n")

# First few rows
print("First 5 rows:")
df.head()

## Step 4: Check Missing Values

In [None]:
# Missing values
missing = df.isnull().sum()
if missing.sum() > 0:
    print("Missing Values:")
    print(missing[missing > 0])
else:
    print("✓ No missing values found!")

## Step 5: Statistical Summary

In [None]:
# Statistical summary
df.describe()

## Step 6: Date Range Analysis

In [None]:
# Convert and analyze date column
df['Date'] = pd.to_datetime(df['Date'])

print("Date Range:")
print(f"Start: {df['Date'].min()}")
print(f"End: {df['Date'].max()}")
print(f"Total Days: {(df['Date'].max() - df['Date'].min()).days:,}")
print(f"Years: {df['Date'].dt.year.nunique()}")

## Step 7: Energy Demand Analysis

In [None]:
# Energy demand statistics
energy_col = 'Energy Required (MU)'

print(f"Energy Demand Statistics ({energy_col}):")
print(f"  Mean: {df[energy_col].mean():.2f} MU")
print(f"  Median: {df[energy_col].median():.2f} MU")
print(f"  Min: {df[energy_col].min():.2f} MU")
print(f"  Max: {df[energy_col].max():.2f} MU")
print(f"  Std Dev: {df[energy_col].std():.2f} MU")

## Step 8: Data Preparation

In [None]:
# Set Date as index and sort
df = df.sort_values('Date')
df.set_index('Date', inplace=True)

# Rename energy column for easier access
df.rename(columns={'Energy Required (MU)': 'demand'}, inplace=True)

print("✓ Data preparation complete!")
print(f"Final shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")

## Step 9: Save Prepared Data

In [None]:
# Save prepared data
os.makedirs('data', exist_ok=True)
df.to_csv('data/prepared_data.csv')

print(f"✓ Saved prepared data to: data/prepared_data.csv")
print(f"\nData is ready for EDA and forecasting!")