# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Function to generate realistic storm data for Nosy Be
def generate_synthetic_data(num_samples, start_date="2024-01-01", end_date="2024-12-31"):
    # Generate random storm observation times
    date_range = pd.date_range(start=start_date, end=end_date, freq="15T")  # every 15 minutes
    np.random.shuffle(date_range.values)  # Shuffle to make it random

    # Initialize lists for the data
    storm_ids = []
    latitudes = []
    longitudes = []
    times = []
    intensities = []
    sizes = []
    distances = []
    Storm_NosyBe_1h = []
    Storm_NosyBe_3h = []

    # Nosy Be coordinates (approximate)
    latitude = -13.3333
    longitude = 48.2500

    # Generate synthetic data
    for i in range(num_samples):
        # Randomly choose a time
        storm_time = date_range[i % len(date_range)]

        # Generate random storm features
        intensity = np.random.uniform(30, 80)  # Storm intensity (realistic intensity range for moderate storms)
        size = np.random.uniform(20, 150)      # Storm size (radius in km, moderate storms)
        distance = np.random.uniform(0, 250)   # Distance to nearest storm (in km)

        # Generate probabilities for storms at 1h and 3h lead time (based on intensity and distance)
        storm_prob_1h = np.clip(0.2 + (intensity / 100) - (distance / 250), 0, 1)  # Based on intensity and distance
        storm_prob_3h = np.clip(0.1 + (intensity / 100) - (distance / 250), 0, 1)

        # Create storm_id (unique identifier for each storm observation)
        storm_id = f"storm_{storm_time.strftime('%Y%m%d%H%M')}"

        # Store the data in lists
        storm_ids.append(storm_id)
        latitudes.append(latitude)
        longitudes.append(longitude)
        times.append(storm_time)
        intensities.append(intensity)
        sizes.append(size)
        distances.append(distance)
        Storm_NosyBe_1h.append(storm_prob_1h)
        Storm_NosyBe_3h.append(storm_prob_3h)

    # Create a DataFrame from the lists
    data = pd.DataFrame({
        'storm_id': storm_ids,
        'latitude': latitudes,
        'longitude': longitudes,
        'time': times,
        'storm_intensity': intensities,
        'storm_size': sizes,
        'storm_distance': distances,
        'Storm_NosyBe_1h': Storm_NosyBe_1h,
        'Storm_NosyBe_3h': Storm_NosyBe_3h
    })

    return data

# Generate synthetic training data (1000 samples)
train_data = generate_synthetic_data(1000)

# Generate synthetic test data (500 samples)
test_data = generate_synthetic_data(500)

# Save to CSV files
train_data.to_csv('train_data.csv', index=False)
test_data.to_csv('test_data.csv', index=False)

print("Synthetic data for train_data.csv and test_data.csv generated successfully.")
