In [1]:
import pandas as pd
import numpy as np
import datetime
import random


def generate_master_intensity_profile(
    avg_lifetime_hours,
    time_step_minutes=5,
    base_intensity_dbz=40,
    intensity_peak_factor_range=(1.5, 2.0),
    intensity_min_dbz=5,
    intensity_decay_rate=0.8,
    noise_level=1.5
):
    """
    Generates a single intensity profile with more predictable link
    between early growth and later peak values.
    """
    profile = []
    lifetime_delta = datetime.timedelta(hours=avg_lifetime_hours)
    total_steps = int(lifetime_delta.total_seconds() / (time_step_minutes * 60))

    # Randomized peak timing, but narrower range for predictability
    peak_relative_time = random.uniform(0.45, 0.55)
    peak_time_seconds = lifetime_delta.total_seconds() * peak_relative_time

    # Make peak height depend partly on base intensity
    early_growth_factor = random.uniform(1.1, 1.4)
    intensity_peak_factor = early_growth_factor * random.uniform(*intensity_peak_factor_range)

    for i in range(total_steps + 1):
        time_elapsed_seconds = i * (time_step_minutes * 60)

        if time_elapsed_seconds <= peak_time_seconds:
            # Growth phase with smoother curve
            growth_phase_duration = peak_time_seconds
            progress_to_peak = time_elapsed_seconds / growth_phase_duration
            # Sigmoid growth: ties early slope to peak height
            intensity_dbz = (
                base_intensity_dbz * intensity_peak_factor
                / (1 + np.exp(-8 * (progress_to_peak - 0.5)))
            )
        else:
            # Decay phase
            decay_phase_duration = lifetime_delta.total_seconds() - peak_time_seconds
            progress_through_decay = (time_elapsed_seconds - peak_time_seconds) / decay_phase_duration
            intensity_dbz = base_intensity_dbz * intensity_peak_factor * np.exp(
                -intensity_decay_rate * progress_through_decay
            )
            intensity_dbz = max(intensity_min_dbz, intensity_dbz)

        # Add low noise for more signal
        noise = random.uniform(-noise_level, noise_level)
        profile.append(intensity_dbz + noise)

    return profile


def generate_storm_cell_lifecycle_with_fixed_lifetime(
    cell_id,
    start_date,
    end_date,
    fixed_lifetime_hours,
    time_step_minutes=5,
    base_size_pixels=120,
    size_factor_range=(0.3, 3.0),
    intensity_min_dbz=5
):
    records = []
    time_range_seconds = (end_date - start_date).total_seconds()
    formation_time_seconds = random.uniform(0, time_range_seconds)
    formation_time = start_date + datetime.timedelta(seconds=formation_time_seconds)

    lifetime_hours = fixed_lifetime_hours
    lifetime_delta = datetime.timedelta(hours=lifetime_hours)
    dissipation_time = formation_time + lifetime_delta

    # Base intensity variation
    base_intensity = random.uniform(35, 45)
    master_intensity_profile = generate_master_intensity_profile(
        avg_lifetime_hours=lifetime_hours,
        base_intensity_dbz=base_intensity
    )

    # Movement simulation
    x_position, y_position = 0, 0
    vx, vy = np.random.normal(0, 0.5, 2)

    num_master_steps = len(master_intensity_profile) - 1
    previous_intensity = 0

    current_time = formation_time
    while current_time <= dissipation_time:
        time_elapsed_seconds = (current_time - formation_time).total_seconds()

        progress_to_end = time_elapsed_seconds / lifetime_delta.total_seconds()
        master_index = int(progress_to_end * num_master_steps)
        intensity_dbz = master_intensity_profile[min(master_index, num_master_steps)]

        # Intensity change rate
        intensity_change_rate = (intensity_dbz - previous_intensity) / (time_step_minutes / 60)

        # Size scaling with intensity
        size_multiplier = np.interp(intensity_dbz, [intensity_min_dbz, 50], size_factor_range)
        size_pixels = int(base_size_pixels * size_multiplier * random.uniform(0.95, 1.05))
        size_pixels = max(10, size_pixels)

        # Rainfall formula tied closely to intensity for predictability
        rainfall_mmhr = 0.08 * (intensity_dbz ** 1.5) + random.uniform(-0.1, 0.1)
        rainfall_mmhr = max(0.0, rainfall_mmhr)

        # Position update
        x_position += vx * (time_step_minutes / 60)
        y_position += vy * (time_step_minutes / 60)

        records.append({
            'cell_id': cell_id,
            'timestamp_utc': current_time,
            'formation_time_utc': formation_time,
            'dissipation_time_utc': dissipation_time,
            'lifetime_hours': lifetime_hours,
            'time_since_formation_hours': time_elapsed_seconds / 3600,
            'x_position': x_position,
            'y_position': y_position,
            'size_pixels': size_pixels,
            'intensity_dbz': intensity_dbz,
            'rainfall_mm_per_hr': rainfall_mmhr,
            'intensity_change_rate': intensity_change_rate
        })

        previous_intensity = intensity_dbz
        current_time += datetime.timedelta(minutes=time_step_minutes)

    return records


# === MAIN SCRIPT ===
if __name__ == "__main__":
    print("--- Generating improved correlated data with fixed lifetime ---")
    all_storm_data = []
    num_simulated_cells = 50000
    overall_start_date = datetime.datetime(2024, 8, 1, 0, 0, 0)
    overall_end_date = datetime.datetime(2024, 8, 5, 23, 59, 59)
    fixed_lifetime_hours = 1.5

    for i in range(num_simulated_cells):
        cell_records = generate_storm_cell_lifecycle_with_fixed_lifetime(
            cell_id=f'StormCell_{i+1:05d}',
            start_date=overall_start_date,
            end_date=overall_end_date,
            fixed_lifetime_hours=fixed_lifetime_hours
        )
        all_storm_data.extend(cell_records)

    df = pd.DataFrame(all_storm_data)
    df = df.sort_values(by=['cell_id', 'timestamp_utc']).reset_index(drop=True)

    output_file = 'scenario2_fixed_lifetime_data_50000.csv'
    df.to_csv(output_file, index=False, date_format='%Y-%m-%d %H:%M:%S.%f')

    print(f"Generated {len(df)} total observations and saved to {output_file}.")
    print(f"Number of unique storm cells: {df['cell_id'].nunique()}")


--- Generating improved correlated data with fixed lifetime ---


  rainfall_mmhr = 0.08 * (intensity_dbz ** 1.5) + random.uniform(-0.1, 0.1)


Generated 950000 total observations and saved to scenario2_fixed_lifetime_data_50000.csv.
Number of unique storm cells: 50000


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# -------------------- Load and Inspect Data --------------------
# This script assumes the CSV file is in the same directory.
# If you get a FileNotFoundError, please ensure the file is in the same directory as this script.
file_path = "scenario2_fixed_lifetime_data_50000.csv"
try:
    storm_df = pd.read_csv(file_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found. Please ensure the file is in the correct directory.")
    # Exit the script gracefully if the file is not found
    exit()

# Convert relevant columns to appropriate data types
storm_df['timestamp_utc'] = pd.to_datetime(storm_df['timestamp_utc'])
storm_df['formation_time_utc'] = pd.to_datetime(storm_df['formation_time_utc'])
storm_df['dissipation_time_utc'] = pd.to_datetime(storm_df['dissipation_time_utc'])
# -------------------- 3. Visualize Two Storm Lifecycles --------------------
# -------------------- 3. Visualize Two Storm Lifecycles --------------------
print("\n--- Visualizing Two Unique Storms with Different Peak Intensities ---")

# Compute peak intensity per storm
peak_intensities = storm_df.groupby("cell_id")["intensity_dbz"].max()

# Get the storm with the lowest and highest peak intensity
min_peak_storm_id = peak_intensities.idxmin()
max_peak_storm_id = peak_intensities.idxmax()

# Filter the data for these two storms
low_intensity_storm_df = storm_df[storm_df['cell_id'] == min_peak_storm_id]
high_intensity_storm_df = storm_df[storm_df['cell_id'] == max_peak_storm_id]

# Create a figure with a 2x1 grid of subplots
fig, axes = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
fig.suptitle('Comparison of Storms with Different Peak Intensities (Fixed Lifetime)', fontsize=16)

# Plot Intensity vs. Lifetime
axes[0].plot(low_intensity_storm_df['time_since_formation_hours'],
             low_intensity_storm_df['intensity_dbz'],
             label=f'Low Intensity Storm (Peak: {low_intensity_storm_df["intensity_dbz"].max():.2f} dBZ)')
axes[0].plot(high_intensity_storm_df['time_since_formation_hours'],
             high_intensity_storm_df['intensity_dbz'],
             label=f'High Intensity Storm (Peak: {high_intensity_storm_df["intensity_dbz"].max():.2f} dBZ)')
axes[0].set_title('Intensity vs. Time Since Formation')
axes[0].set_xlabel('Time Since Formation (hours)')
axes[0].set_ylabel('Intensity (dBZ)')
axes[0].legend()
axes[0].grid(True)
axes[0].set_xlim(0, 1.5)  # Fixed lifetime window

# Plot Rainfall vs. Lifetime
axes[1].plot(low_intensity_storm_df['time_since_formation_hours'],
             low_intensity_storm_df['rainfall_mm_per_hr'],
             label='Low Intensity Storm')
axes[1].plot(high_intensity_storm_df['time_since_formation_hours'],
             high_intensity_storm_df['rainfall_mm_per_hr'],
             label='High Intensity Storm')
axes[1].set_title('Rainfall vs. Time Since Formation')
axes[1].set_xlabel('Time Since Formation (hours)')
axes[1].set_ylabel('Rainfall (mm/hr)')
axes[1].legend()
axes[1].grid(True)
axes[1].set_xlim(0, 1.5)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig('storm_lifecycle_comparison.png')
plt.close()

print("\nVisualization saved to 'storm_lifecycle_exp2.png'.")

Data loaded successfully.

--- Visualizing Two Unique Storms with Different Peak Intensities ---

Visualization saved to 'storm_lifecycle_exp2.png'.
