# Wind Correlation Analysis - Example Notebook

This notebook demonstrates how to use the wind correlation analysis framework interactively.

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

%matplotlib inline
%load_ext autoreload
%autoreload 2

## 1. Load Configuration and Initialize Components

In [None]:
import yaml

# Load station configuration
with open('../config/stations.yaml', 'r') as f:
    config = yaml.safe_load(f)

stations_df = pd.DataFrame(config['stations'])
print("Configured stations:")
print(stations_df)

## 2. Download METAR Data

In [None]:
from data_acquisition.metar_downloader import METARDownloader

# Initialize downloader
metar = METARDownloader(output_dir='../data/raw/metar')

# Download data for a short period
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 1, 7)  # 1 week for quick testing

wind_data = metar.download_multiple_stations(
    stations=stations_df.to_dict('records'),
    start_date=start_date,
    end_date=end_date,
    resample=True
)

print(f"\nDownloaded data for {len(wind_data)} stations")

## 3. Examine Wind Speed Data

In [None]:
# Plot wind speed time series
fig, axes = plt.subplots(len(wind_data), 1, figsize=(12, 3*len(wind_data)))

if len(wind_data) == 1:
    axes = [axes]

for ax, (station_id, df) in zip(axes, wind_data.items()):
    ax.plot(df.index, df['wind_speed_ms'], label=station_id)
    ax.set_ylabel('Wind Speed (m/s)')
    ax.set_title(f'Station {station_id}')
    ax.grid(True, alpha=0.3)
    ax.legend()

plt.tight_layout()
plt.show()

## 4. Generate Synthetic ERA5 Data

For demonstration without CDS credentials

In [None]:
# Create synthetic ERA5 data
era5_data = {}
date_range = pd.date_range(start=start_date, end=end_date, freq='1H')

for station_id in wind_data.keys():
    # Synthetic geostrophic wind
    base_direction = 270  # Westerly
    direction_var = 30 * np.sin(np.arange(len(date_range)) * 2 * np.pi / (24 * 7))
    geostrophic_direction = (base_direction + direction_var + 
                            np.random.normal(0, 15, len(date_range))) % 360
    
    base_speed = 15  # m/s
    speed_var = 5 * np.sin(np.arange(len(date_range)) * 2 * np.pi / 24)
    geostrophic_speed = np.maximum(
        base_speed + speed_var + np.random.normal(0, 3, len(date_range)), 1.0
    )
    
    df = pd.DataFrame({
        'timestamp': date_range,
        'geostrophic_speed': geostrophic_speed,
        'geostrophic_direction': geostrophic_direction
    }).set_index('timestamp')
    
    era5_data[station_id] = df

print(f"Generated ERA5 data for {len(era5_data)} stations")

## 5. Calculate Station Pair Relationships

In [None]:
from analysis.spatial_utils import SpatialCalculations

station_pairs = SpatialCalculations.calculate_station_pairs(stations_df)
print("\nStation pairs:")
print(station_pairs[['station1_name', 'station2_name', 'distance_km', 'bearing_deg']])

## 6. Perform Cross-Correlation Analysis

In [None]:
from analysis.correlation_analysis import CrossCorrelationAnalysis

analyzer = CrossCorrelationAnalysis(bin_width=30.0, max_lag_hours=12.0)

results = analyzer.analyze_all_pairs(
    wind_data=wind_data,
    era5_data=era5_data,
    station_pairs=station_pairs
)

print(f"\nCompleted analysis for {len(results)} pairs")

## 7. Visualize Results

In [None]:
from visualization.polar_plots import PolarCorrelationPlot

plotter = PolarCorrelationPlot(figsize=(10, 10))

# Plot each pair
for (station1_id, station2_id), results_df in results.items():
    pair_info = station_pairs[
        (station_pairs['station1_id'] == station1_id) &
        (station_pairs['station2_id'] == station2_id)
    ].iloc[0]
    
    fig = plotter.plot_correlation_polar(
        results_df=results_df,
        station1_name=pair_info['station1_name'],
        station2_name=pair_info['station2_name'],
        distance_km=pair_info['distance_km'],
        bearing_deg=pair_info['bearing_deg'],
        show_plot=True
    )
    
    plt.show()

## 8. Examine Detailed Results

In [None]:
# Show results table for first pair
first_pair = list(results.keys())[0]
print(f"Results for {first_pair[0]} - {first_pair[1]}:")
print(results[first_pair][['bin_center_deg', 'n_samples', 'tau_mean_hours', 
                            'max_correlation', 'lag_at_max_hours']])

## 9. Direction Bin Statistics

In [None]:
from visualization.polar_plots import CorrelationSummaryPlots

for (station1_id, station2_id), results_df in results.items():
    pair_info = station_pairs[
        (station_pairs['station1_id'] == station1_id) &
        (station_pairs['station2_id'] == station2_id)
    ].iloc[0]
    
    fig = CorrelationSummaryPlots.plot_direction_bin_statistics(
        results_df=results_df,
        station1_name=pair_info['station1_name'],
        station2_name=pair_info['station2_name']
    )
    
    plt.show()

## Next Steps

1. Try downloading real ERA5 data (requires CDS credentials)
2. Extend the analysis period
3. Add more stations
4. Experiment with different bin widths
5. Analyze seasonal variations