# Wind Correlation Analysis - Example Notebook

This notebook demonstrates how to use the wind correlation analysis framework interactively.

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

%matplotlib inline
%load_ext autoreload
%autoreload 2

## 1. Load Configuration and Initialize Components

In [None]:
import yaml

# Load station configuration
with open('../config/stations.yaml', 'r') as f:
    config = yaml.safe_load(f)

stations_df = pd.DataFrame(config['stations'])
print("Configured stations:")
print(stations_df)

## 2. Download METAR Data

In [None]:
from data_acquisition.metar_downloader import METARDownloader

# Initialize downloader
metar = METARDownloader(output_dir='../data/raw/metar')

# Download data for a short period
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 1, 7)  # 1 week for quick testing

wind_data = metar.download_multiple_stations(
    stations=stations_df.to_dict('records'),
    start_date=start_date,
    end_date=end_date,
    resample=True
)

print(f"\nDownloaded data for {len(wind_data)} stations")

## 3. Examine Wind Speed Data

In [None]:
# Plot wind speed time series
fig, axes = plt.subplots(len(wind_data), 1, figsize=(12, 3*len(wind_data)))

if len(wind_data) == 1:
    axes = [axes]

for ax, (station_id, df) in zip(axes, wind_data.items()):
    ax.plot(df.index, df['wind_speed_ms'], label=station_id)
    ax.set_ylabel('Wind Speed (m/s)')
    ax.set_title(f'Station {station_id}')
    ax.grid(True, alpha=0.3)
    ax.legend()

plt.tight_layout()
plt.show()

## 4. Download ERA5 Data

Download real ERA5 reanalysis data for geostrophic wind calculation

In [None]:
from data_acquisition.era5_downloader import ERA5Downloader

# Initialize ERA5 downloader
era5_downloader = ERA5Downloader(output_dir='../data/raw/era5')

# Load bbox from config
bbox = {
    'north': config['era5']['bbox']['north'],
    'south': config['era5']['bbox']['south'],
    'east': config['era5']['bbox']['east'],
    'west': config['era5']['bbox']['west']
}

print(f"Downloading ERA5 data for bounding box:")
print(f"  North: {bbox['north']}°, South: {bbox['south']}°")
print(f"  East: {bbox['east']}°, West: {bbox['west']}°")
print(f"  Time range: {start_date} to {end_date}")
print("\nThis may take a few minutes on first download...")
print("Subsequent runs will use cached data.\n")

# Download pressure-level data for geostrophic wind calculation
era5_file = era5_downloader.download_pressure_level_data(
    start_date=start_date,
    end_date=end_date,
    bbox=bbox,
    variables=config['era5']['variables'] if 'geopotential' in config['era5']['variables'] else ['geopotential'],
    pressure_levels=[str(pl) for pl in config['era5']['pressure_levels']]
)

print(f"\n✓ ERA5 data downloaded: {era5_file}")

# Process ERA5 data to calculate geostrophic wind at each station
era5_data = {}

for station_id in wind_data.keys():
    station = stations_df[stations_df['id'] == station_id].iloc[0]
    
    # Extract and process data at this station location
    timeseries = era5_downloader.extract_station_timeseries(
        era5_file,
        lat=station['lat'],
        lon=station['lon']
    )
    
    # Calculate geostrophic wind from geopotential
    geostrophic_wind = era5_downloader.calculate_geostrophic_wind(
        timeseries,
        lat=station['lat']
    )
    
    era5_data[station_id] = geostrophic_wind
    print(f"✓ Processed ERA5 data for {station_id}")

print(f"\n✓ Generated geostrophic wind data for {len(era5_data)} stations")

## 5. Calculate Station Pair Relationships

In [None]:
from analysis.spatial_utils import SpatialCalculations

station_pairs = SpatialCalculations.calculate_station_pairs(stations_df)
print("\nStation pairs:")
print(station_pairs[['station1_name', 'station2_name', 'distance_km', 'bearing_deg']])

## 6. Perform Cross-Correlation Analysis

In [None]:
from analysis.correlation_analysis import CrossCorrelationAnalysis

analyzer = CrossCorrelationAnalysis(bin_width=30.0, max_lag_hours=12.0)

results = analyzer.analyze_all_pairs(
    wind_data=wind_data,
    era5_data=era5_data,
    station_pairs=station_pairs
)

print(f"\nCompleted analysis for {len(results)} pairs")

## 7. Visualize Results

In [None]:
from visualization.polar_plots import PolarCorrelationPlot

plotter = PolarCorrelationPlot(figsize=(10, 10))

# Plot each pair
for (station1_id, station2_id), results_df in results.items():
    pair_info = station_pairs[
        (station_pairs['station1_id'] == station1_id) &
        (station_pairs['station2_id'] == station2_id)
    ].iloc[0]
    
    fig = plotter.plot_correlation_polar(
        results_df=results_df,
        station1_name=pair_info['station1_name'],
        station2_name=pair_info['station2_name'],
        distance_km=pair_info['distance_km'],
        bearing_deg=pair_info['bearing_deg'],
        show_plot=True
    )
    
    plt.show()

## 8. Examine Detailed Results

In [None]:
# Show results table for first pair
first_pair = list(results.keys())[0]
print(f"Results for {first_pair[0]} - {first_pair[1]}:")
print(results[first_pair][['bin_center_deg', 'n_samples', 'tau_mean_hours', 
                            'max_correlation', 'lag_at_max_hours']])

## 9. Direction Bin Statistics

In [None]:
from visualization.polar_plots import CorrelationSummaryPlots

for (station1_id, station2_id), results_df in results.items():
    pair_info = station_pairs[
        (station_pairs['station1_id'] == station1_id) &
        (station_pairs['station2_id'] == station2_id)
    ].iloc[0]
    
    fig = CorrelationSummaryPlots.plot_direction_bin_statistics(
        results_df=results_df,
        station1_name=pair_info['station1_name'],
        station2_name=pair_info['station2_name']
    )
    
    plt.show()

## Next Steps

1. Try downloading real ERA5 data (requires CDS credentials)
2. Extend the analysis period
3. Add more stations
4. Experiment with different bin widths
5. Analyze seasonal variations