In [1]:
import pandas as pd
import numpy as np
import datetime
import random

def generate_master_intensity_profile(
    avg_lifetime_hours=2.0, 
    time_step_minutes=5,
    base_intensity_dbz=40, 
    intensity_peak_factor=1.8, 
    intensity_min_dbz=5, 
    intensity_decay_rate=0.8
):
    """Generates a single, deterministic intensity profile for all storm cells, with no noise."""
    profile = []
    lifetime_delta = datetime.timedelta(hours=avg_lifetime_hours)
    total_steps = int(lifetime_delta.total_seconds() / (time_step_minutes * 60))
    
    # Fixed peak at the halfway point
    peak_relative_time = 0.5
    peak_time_seconds = lifetime_delta.total_seconds() * peak_relative_time
    
    for i in range(total_steps + 1):
        time_elapsed_seconds = i * (time_step_minutes * 60)
        
        if time_elapsed_seconds <= peak_time_seconds:
            growth_phase_duration = peak_time_seconds
            if growth_phase_duration == 0:
                intensity_dbz = base_intensity_dbz * intensity_peak_factor
            else:
                progress_to_peak = time_elapsed_seconds / growth_phase_duration
                intensity_dbz = base_intensity_dbz / (1 + np.exp(-10 * (progress_to_peak - 0.5)))
        else:
            decay_phase_duration = lifetime_delta.total_seconds() - peak_time_seconds
            if decay_phase_duration == 0:
                intensity_dbz = intensity_min_dbz
            else:
                progress_through_decay = (time_elapsed_seconds - peak_time_seconds) / decay_phase_duration
                intensity_dbz = base_intensity_dbz * intensity_peak_factor * np.exp(-intensity_decay_rate * progress_through_decay)
                intensity_dbz = max(intensity_min_dbz, intensity_dbz)
        
        profile.append(intensity_dbz)  # No noise added to ensure identical profile
        
    return profile

def generate_storm_cell_lifecycle_with_fixed_intensity(
    cell_id,
    start_date,
    end_date,
    master_intensity_profile,
    time_step_minutes=5,
    avg_lifetime_hours=2.0,
    lifetime_std_hours=0.3,
    base_size_pixels=120,
    size_factor_range=(0.3, 3.0),
    intensity_min_dbz=5,
    intensity_peak_factor=1.8
):
    records = []
    time_range_seconds = (end_date - start_date).total_seconds()
    formation_time_seconds = random.uniform(0, time_range_seconds)
    formation_time = start_date + datetime.timedelta(seconds=formation_time_seconds)
    
    # Variable lifetime with slightly increased randomness, capped at 2 hours
    lifetime_hours = min(2.0, max(0.5, np.random.normal(avg_lifetime_hours, lifetime_std_hours)))
    lifetime_delta = datetime.timedelta(hours=lifetime_hours)
    dissipation_time = formation_time + lifetime_delta
    
    # Simulate movement with slightly increased randomness
    x_position, y_position = 0, 0
    vx, vy = np.random.normal(0, 2, 2)
    
    # Master profile for intensity
    num_master_steps = len(master_intensity_profile) - 1
    previous_intensity = 0
    
    current_time = formation_time
    while current_time <= dissipation_time:
        time_elapsed_seconds = (current_time - formation_time).total_seconds()
        
        # Interpolate intensity from the master profile
        progress_to_end = time_elapsed_seconds / lifetime_delta.total_seconds()
        master_index = int(progress_to_end * num_master_steps)
        intensity_dbz = master_intensity_profile[min(master_index, num_master_steps)]
        
        # Compute derived features
        intensity_change_rate = (intensity_dbz - previous_intensity) / (time_step_minutes / 60)
        size_multiplier = np.interp(intensity_dbz, [intensity_min_dbz, 40 * intensity_peak_factor], size_factor_range)
        size_pixels = int(120 * size_multiplier * random.uniform(0.8, 1.2))
        size_pixels = max(10, size_pixels)
        rainfall_mmhr = 0.07 * (intensity_dbz ** 1.6) + random.uniform(-1.0, 1.0)
        rainfall_mmhr = max(0.0, rainfall_mmhr)
        
        # Update position
        x_position += vx * (time_step_minutes / 60)
        y_position += vy * (time_step_minutes / 60)
        
        records.append({
            'cell_id': cell_id,
            'timestamp_utc': current_time,
            'formation_time_utc': formation_time,
            'dissipation_time_utc': dissipation_time,
            'lifetime_hours': lifetime_hours,
            'time_since_formation_hours': time_elapsed_seconds / 3600,
            'x_position': x_position,
            'y_position': y_position,
            'size_pixels': size_pixels,
            'intensity_dbz': intensity_dbz,
            'rainfall_mm_per_hr': rainfall_mmhr,
            'intensity_change_rate': intensity_change_rate
        })
        previous_intensity = intensity_dbz
        current_time += datetime.timedelta(minutes=time_step_minutes)
    return records

# --- SCENARIO: Fixed Intensity Profile, Slightly Increased Variability ---
print("--- Generating data with a fixed intensity profile and max 2hr lifetime ---")
all_storm_data = []
num_simulated_cells = 50000
overall_start_date = datetime.datetime(2024, 8, 1, 0, 0, 0)
overall_end_date = datetime.datetime(2024, 8, 5, 23, 59, 59)

# Step 1: Generate the single, master intensity profile for a 2-hour storm
avg_lifetime_hours = 2.0
master_intensity_profile = generate_master_intensity_profile(avg_lifetime_hours)

# Step 2: Generate storm cells using this profile
for i in range(num_simulated_cells):
    cell_records = generate_storm_cell_lifecycle_with_fixed_intensity(
        cell_id=f'StormCell_{i+1:05d}',
        start_date=overall_start_date,
        end_date=overall_end_date,
        master_intensity_profile=master_intensity_profile,
        time_step_minutes=5,
        avg_lifetime_hours=avg_lifetime_hours,
        lifetime_std_hours=0.3,
        base_size_pixels=120,
        size_factor_range=(0.3, 3.0),
        intensity_peak_factor=1.8,
        intensity_min_dbz=5
    )
    all_storm_data.extend(cell_records)

df_scenario = pd.DataFrame(all_storm_data)
df_scenario = df_scenario.sort_values(by=['cell_id', 'timestamp_utc']).reset_index(drop=True)

# Save the DataFrame to a CSV file
output_file = 'scenario1_fixed_intensity_data_50000.csv'
df_scenario.to_csv(output_file, index=False, date_format='%Y-%m-%d %H:%M:%S.%f')
print(f"Generated {len(df_scenario)} total observations and saved to {output_file}.")
print(f"Number of unique storm cells: {df_scenario['cell_id'].nunique()}")


--- Generating data with a fixed intensity profile and max 2hr lifetime ---
Generated 1164799 total observations and saved to scenario1_fixed_intensity_data_50000.csv.
Number of unique storm cells: 50000


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# -------------------- Load and Inspect Data --------------------
# This script assumes the CSV file is in the same directory.
# If you get a FileNotFoundError, please check the file path.
file_path = "scenario1_fixed_intensity_data_50000.csv"
try:
    storm_df = pd.read_csv(file_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found. Please ensure the file is in the same directory as this script.")
    # Exit the script gracefully if the file is not found
    exit()

# -------------------- 2. & 3. Visualize Two Unique Storm Lifecycles --------------------
print("\n--- Visualizing Two Unique Storm Lifecycles ---")

# Get two unique storm IDs with different lifetimes
all_lifetimes = storm_df.groupby('cell_id')['lifetime_hours'].first()
short_storm_id = all_lifetimes.idxmin()
long_storm_id = all_lifetimes.idxmax()

# Filter the data for these two storms
storm1_df = storm_df[storm_df['cell_id'] == short_storm_id]
storm2_df = storm_df[storm_df['cell_id'] == long_storm_id]

# Create a figure with a 2x1 grid of subplots
fig, axes = plt.subplots(2, 1, figsize=(12, 10), sharex=False)
fig.suptitle('Comparison of Two Storm Cell Lifecycles', fontsize=16)

# Plot Intensity vs. Lifetime
axes[0].plot(storm1_df['time_since_formation_hours'], storm1_df['intensity_dbz'], label=f'Storm ID: {short_storm_id} (Lifetime: {storm1_df["lifetime_hours"].iloc[0]:.2f} hrs)')
axes[0].plot(storm2_df['time_since_formation_hours'], storm2_df['intensity_dbz'], label=f'Storm ID: {long_storm_id} (Lifetime: {storm2_df["lifetime_hours"].iloc[0]:.2f} hrs)')
axes[0].set_title('Intensity vs. Time Since Formation')
axes[0].set_xlabel('Time Since Formation (hours)')
axes[0].set_ylabel('Intensity (dBZ)')
axes[0].legend()
axes[0].grid(True)

# Plot Rainfall vs. Lifetime
axes[1].plot(storm1_df['time_since_formation_hours'], storm1_df['rainfall_mm_per_hr'], label=f'Storm ID: {short_storm_id}')
axes[1].plot(storm2_df['time_since_formation_hours'], storm2_df['rainfall_mm_per_hr'], label=f'Storm ID: {long_storm_id}')
axes[1].set_title('Rainfall vs. Time Since Formation')
axes[1].set_xlabel('Time Since Formation (hours)')
axes[1].set_ylabel('Rainfall (mm/hr)')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig('storm_lifecycles.png')
plt.close()
print("Plots saved to 'storm_lifecycles_exp_1.png'.")


Data loaded successfully.

--- Visualizing Two Unique Storm Lifecycles ---
Plots saved to 'storm_lifecycles_exp_1.png'.
