In [None]:
import pandas as pd
import numpy as np

# Function to create a forecasting dataset with additional features
def create_forecasting_dataset_with_additional_features(data, start_date, end_date, history_window_days, forecast_window_days):
    new_data = []

    # Process only the data within the specified date range
    valid_data = data[(data['Time'] >= start_date) & (data['Time'] <= end_date)]

    for current_day in valid_data['Time']:
        # History window: Previous 100 days
        history_start = current_day - pd.Timedelta(days=history_window_days)
        history_window = data[(data['Time'] >= history_start) & (data['Time'] < current_day)]

        # Forecast window: Next 100 days
        forecast_end = current_day + pd.Timedelta(days=forecast_window_days)
        forecast_window = data[(data['Time'] > current_day) & (data['Time'] <= forecast_end)]

        # If both history window and forecast window contain enough data
        if not history_window.empty and not forecast_window.empty:
            # History window features
            max_magnitude = history_window['Magnitude'].max()
            mean_magnitude = history_window['Magnitude'].mean()
            std_magnitude = history_window['Magnitude'].std()
            mean_depth = history_window['Depth'].mean()
            earthquake_frequency = len(history_window) / history_window_days
            earthquake_numbers = len(history_window)
            avg_energy = history_window['Energy'].mean() if 'Energy' in history_window.columns else np.nan
            std_energy = history_window['Energy'].std() if 'Energy' in history_window.columns else np.nan
            mean_b_value = history_window['b_value'].mean()
            std_b_value = history_window['b_value'].std()
            b_value_ratio = std_b_value / mean_b_value if mean_b_value != 0 else np.nan
            total_energy = history_window['Energy'].sum() if 'Energy' in history_window.columns else np.nan
            energy_density = total_energy / earthquake_numbers if earthquake_numbers > 0 else np.nan

            # Elapsed Time: Time since the last earthquake
            elapsed_time = (current_day - history_window['Time'].max()).total_seconds() / 3600 if not history_window.empty else np.nan

            # Future window target variable
            future_max_magnitude = forecast_window['Magnitude'].max()

            # Append the new data row
            new_data.append([
                current_day, max_magnitude, mean_magnitude, std_magnitude, mean_depth,
                earthquake_frequency, earthquake_numbers, avg_energy, std_energy, mean_b_value,
                std_b_value, b_value_ratio, total_energy, energy_density,
                elapsed_time, future_max_magnitude
            ])

    # Create new DataFrame
    new_df = pd.DataFrame(
        new_data,
        columns=[
            'Time', 'Max_Magnitude', 'Mean_Magnitude', 'Std_Magnitude', 'Mean_Depth',
            'Earthquake_Frequency', 'Earthquake_Numbers', 'Mean_Energy', 'Std_Energy',
            'Mean_b_value', 'Std_b_value', 'b_value_ratio', 'Total_Energy', 'Energy_Density',
            'Elapsed_Time', 'Future_Max_Magnitude'
        ]
    )

    return new_df

# Load the data
data = pd.read_csv('Marmara1990V1.csv')

# Clean extra spaces in column names
data.columns = data.columns.str.strip()

# Check for missing 'b_value' column
if 'b_value' not in data.columns:
    data['b_value'] = np.nan  # Create the missing column

# Convert 'Time' column to datetime format
data['Time'] = pd.to_datetime(data['Time'])

# Sort DataFrame by time
data = data.sort_values(by='Time')

# Calculate the time difference between earthquakes
data['Time_Difference'] = data['Time'].diff().dt.total_seconds() / 3600  # Time difference in hours
data['Time_Difference'].fillna(data['Time_Difference'].mean(), inplace=True)  # Fill the first difference

# 100-day moving average and standard deviation
data['Time_Diff_MA'] = data['Time_Difference'].rolling(window=100, min_periods=1).mean()
data['Time_Diff_Std'] = data['Time_Difference'].rolling(window=100, min_periods=1).std()
data['b_value_MA'] = data['b_value'].rolling(window=100, min_periods=1).mean()
data['b_value_Std'] = data['b_value'].rolling(window=100, min_periods=1).std()

# Set the start date as 100 days after the first record
start_date = data['Time'].min() + pd.Timedelta(days=100)
end_date = data['Time'].max() - pd.Timedelta(days=100)

# History and forecast window days
history_window_days = 100
forecast_window_days = 100

# Create the forecasting dataset
forecasting_data = create_forecasting_dataset_with_additional_features(data, start_date, end_date, history_window_days, forecast_window_days)

# Save the new dataset to CSV
output_file = "forecasting_dataset_with_all_features.csv"
forecasting_data.to_csv(output_file, index=False)

print(f"The new forecasting dataset has been saved to '{output_file}'.")
