In [1]:
# =========================================================
# Scenario 3: All variables variable
# =========================================================
import pandas as pd
import numpy as np
import datetime
import random

def generate_variable_intensity_profile(
    lifetime_hours,
    time_step_minutes=5,
    base_intensity_range=(30, 50),
    intensity_peak_factor_range=(1.3, 2.5),
    intensity_min_dbz=5,
    intensity_decay_rate_range=(0.5, 1.2),
    noise_level_range=(0.5, 3.0),
    peak_time_relative_range=(0.3, 0.7)
):
    """Generates a fully variable intensity profile with randomness in all parameters."""
    profile = []
    base_intensity_dbz = random.uniform(*base_intensity_range)
    intensity_peak_factor = random.uniform(*intensity_peak_factor_range)
    intensity_decay_rate = random.uniform(*intensity_decay_rate_range)
    noise_level = random.uniform(*noise_level_range)

    lifetime_delta = datetime.timedelta(hours=lifetime_hours)
    total_steps = int(lifetime_delta.total_seconds() / (time_step_minutes * 60))
    peak_relative_time = random.uniform(*peak_time_relative_range)
    peak_time_seconds = lifetime_delta.total_seconds() * peak_relative_time

    for i in range(total_steps + 1):
        time_elapsed_seconds = i * (time_step_minutes * 60)

        if time_elapsed_seconds <= peak_time_seconds:
            # Growth phase
            progress_to_peak = time_elapsed_seconds / peak_time_seconds
            intensity_dbz = base_intensity_dbz * intensity_peak_factor / (1 + np.exp(-10 * (progress_to_peak - 0.5)))
        else:
            # Decay phase
            decay_phase_duration = lifetime_delta.total_seconds() - peak_time_seconds
            progress_through_decay = (time_elapsed_seconds - peak_time_seconds) / max(decay_phase_duration, 1)
            intensity_dbz = base_intensity_dbz * intensity_peak_factor * np.exp(-intensity_decay_rate * progress_through_decay)
            intensity_dbz = max(intensity_min_dbz, intensity_dbz)

        # Add noise
        noise = random.uniform(-noise_level, noise_level)
        profile.append(intensity_dbz + noise)

    return profile

def generate_storm_cell_lifecycle_all_variable(
    cell_id,
    start_date,
    end_date,
    time_step_minutes=5,
    lifetime_range=(0.5, 2.5),
    base_size_pixels=120,
    size_factor_range=(0.3, 3.0),
    intensity_min_dbz=5
):
    records = []
    time_range_seconds = (end_date - start_date).total_seconds()
    formation_time_seconds = random.uniform(0, time_range_seconds)
    formation_time = start_date + datetime.timedelta(seconds=formation_time_seconds)

    # Fully variable lifetime
    lifetime_hours = random.uniform(*lifetime_range)
    lifetime_delta = datetime.timedelta(hours=lifetime_hours)
    dissipation_time = formation_time + lifetime_delta

    # Generate unique intensity profile for this storm
    master_intensity_profile = generate_variable_intensity_profile(lifetime_hours)

    # Variable movement speeds
    x_position, y_position = 0, 0
    vx, vy = np.random.normal(0, random.uniform(0.2, 3.0), 2)

    num_master_steps = len(master_intensity_profile) - 1
    previous_intensity = 0

    current_time = formation_time
    while current_time <= dissipation_time:
        time_elapsed_seconds = (current_time - formation_time).total_seconds()
        progress_to_end = time_elapsed_seconds / lifetime_delta.total_seconds()
        master_index = int(progress_to_end * num_master_steps)
        intensity_dbz = master_intensity_profile[min(master_index, num_master_steps)]

        # Intensity change rate
        intensity_change_rate = (intensity_dbz - previous_intensity) / (time_step_minutes / 60)

        # Size scaling
        size_multiplier = np.interp(intensity_dbz, [intensity_min_dbz, 55], size_factor_range)
        size_pixels = int(base_size_pixels * size_multiplier * random.uniform(0.8, 1.2))
        size_pixels = max(10, size_pixels)

        # Rainfall formula â€” variable relationship
        rainfall_exp = random.uniform(1.3, 1.7)
        rainfall_coeff = random.uniform(0.05, 0.1)
        rainfall_mmhr = rainfall_coeff * (max(intensity_dbz, 0) ** rainfall_exp) + random.uniform(-1.5, 1.5)
        rainfall_mmhr = max(0.0, rainfall_mmhr)

        # Update position
        x_position += vx * (time_step_minutes / 60)
        y_position += vy * (time_step_minutes / 60)

        records.append({
            'cell_id': cell_id,
            'timestamp_utc': current_time,
            'formation_time_utc': formation_time,
            'dissipation_time_utc': dissipation_time,
            'lifetime_hours': lifetime_hours,
            'time_since_formation_hours': time_elapsed_seconds / 3600,
            'x_position': x_position,
            'y_position': y_position,
            'size_pixels': size_pixels,
            'intensity_dbz': intensity_dbz,
            'rainfall_mm_per_hr': rainfall_mmhr,
            'intensity_change_rate': intensity_change_rate
        })

        previous_intensity = intensity_dbz
        current_time += datetime.timedelta(minutes=time_step_minutes)

    return records

# === MAIN SCRIPT ===
if __name__ == "__main__":
    print("--- Generating fully variable storm data (Scenario 3) ---")
    all_storm_data = []
    num_simulated_cells = 50000
    overall_start_date = datetime.datetime(2024, 8, 1, 0, 0, 0)
    overall_end_date = datetime.datetime(2024, 8, 5, 23, 59, 59)

    for i in range(num_simulated_cells):
        cell_records = generate_storm_cell_lifecycle_all_variable(
            cell_id=f'StormCell_{i+1:05d}',
            start_date=overall_start_date,
            end_date=overall_end_date
        )
        all_storm_data.extend(cell_records)

    df = pd.DataFrame(all_storm_data)
    df = df.sort_values(by=['cell_id', 'timestamp_utc']).reset_index(drop=True)

    output_file = 'scenario3_all_variable_data_50000.csv'
    df.to_csv(output_file, index=False, date_format='%Y-%m-%d %H:%M:%S.%f')

    print(f"Generated {len(df)} total observations and saved to {output_file}.")
    print(f"Number of unique storm cells: {df['cell_id'].nunique()}")


--- Generating fully variable storm data (Scenario 3) ---
Generated 924494 total observations and saved to scenario3_all_variable_data_50000.csv.
Number of unique storm cells: 50000


In [2]:
import pandas as pd
import matplotlib.pyplot as plt

# ------------------ LOAD THE SIMULATED DATA ------------------
csv_file = "scenario3_all_variable_data_50000.csv"
storm_df = pd.read_csv(csv_file, parse_dates=['timestamp_utc', 'formation_time_utc', 'dissipation_time_utc'])

# -------------------- 3. Visualize Two Storm Lifecycles --------------------
# -------------------- 3. Visualize Two Storm Lifecycles --------------------
print("\n--- Visualizing Two Unique Storms with Different Peak Intensities ---")

# Compute peak intensity per storm
peak_intensities = storm_df.groupby("cell_id")["intensity_dbz"].max()

# Get the storm with the lowest and highest peak intensity
min_peak_storm_id = peak_intensities.idxmin()
max_peak_storm_id = peak_intensities.idxmax()

# Filter the data for these two storms
low_intensity_storm_df = storm_df[storm_df['cell_id'] == min_peak_storm_id]
high_intensity_storm_df = storm_df[storm_df['cell_id'] == max_peak_storm_id]

# Create a figure with a 2x1 grid of subplots
fig, axes = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
fig.suptitle('Comparison of Storms with Different Peak Intensities and Lifetimes', fontsize=16)

# Plot Intensity vs. Lifetime
axes[0].plot(low_intensity_storm_df['time_since_formation_hours'],
             low_intensity_storm_df['intensity_dbz'],
             label=f'Low Intensity Storm (Peak: {low_intensity_storm_df["intensity_dbz"].max():.2f} dBZ)')
axes[0].plot(high_intensity_storm_df['time_since_formation_hours'],
             high_intensity_storm_df['intensity_dbz'],
             label=f'High Intensity Storm (Peak: {high_intensity_storm_df["intensity_dbz"].max():.2f} dBZ)')
axes[0].set_title('Intensity vs. Time Since Formation')
axes[0].set_xlabel('Time Since Formation (hours)')
axes[0].set_ylabel('Intensity (dBZ)')
axes[0].legend()
axes[0].grid(True)

# Plot Rainfall vs. Lifetime
axes[1].plot(low_intensity_storm_df['time_since_formation_hours'],
             low_intensity_storm_df['rainfall_mm_per_hr'],
             label='Low Intensity Storm')
axes[1].plot(high_intensity_storm_df['time_since_formation_hours'],
             high_intensity_storm_df['rainfall_mm_per_hr'],
             label='High Intensity Storm')
axes[1].set_title('Rainfall vs. Time Since Formation')
axes[1].set_xlabel('Time Since Formation (hours)')
axes[1].set_ylabel('Rainfall (mm/hr)')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig('storm_lifecycle_comparison.png')
plt.close()

print("\nVisualization saved to 'storm_lifecycle_comparison.png'.")


--- Visualizing Two Unique Storms with Different Peak Intensities ---

Visualization saved to 'storm_lifecycle_comparison.png'.
