In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import  ReduceLROnPlateau
from torch.cuda.amp import GradScaler, autocast

import seaborn as sns
import matplotlib.pyplot  as plt

# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from earthquake_prediction.src.helpers.datapi import url, url_data_call
from earthquake_prediction.src.model.model import Early_Stopping, ModelCheckPoint, EarthquakeModel
from earthquake_prediction.src.preprocessing.data_preprocessing import *

KeyboardInterrupt: 

In [None]:
df = url_data_call(url)
df = data_preprocessing(df, ts=True)
df = imput_encode(df)
df = CyclicTimeTransform(df)
df

Unnamed: 0_level_0,mag,dmin,rms,magType,longitude,latitude,elevation,Hour sin,Hour cos,Day sin,Day cos,Month sin,Month cos,day_of_year,month
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-12-31 23:18:09.731,4.20,6.2370,0.53,0,143.560200,-4.259000,133.548,-0.258819,0.965926,0.433884,-0.900969,-2.449294e-16,1.000000,365,12
2023-12-31 22:52:31.316,4.20,9.1640,0.68,0,141.420800,-2.688600,10.000,-0.500000,0.866025,0.433884,-0.900969,-2.449294e-16,1.000000,365,12
2023-12-31 22:48:19.016,4.40,3.4550,0.54,0,-178.332700,-17.506500,540.654,-0.500000,0.866025,0.433884,-0.900969,-2.449294e-16,1.000000,365,12
2023-12-31 22:13:49.174,4.20,0.4520,0.53,0,144.589200,13.952600,193.928,-0.500000,0.866025,0.433884,-0.900969,-2.449294e-16,1.000000,365,12
2023-12-31 21:42:27.330,2.56,0.1071,0.17,2,-66.816000,18.064167,14.760,-0.707107,0.707107,0.433884,-0.900969,-2.449294e-16,1.000000,365,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-01 00:49:00.850,3.09,0.5998,0.21,2,-66.786167,18.948833,14.100,0.000000,1.000000,0.781831,0.623490,5.000000e-01,-0.866025,121,5
2023-05-01 00:46:53.270,3.13,0.6209,0.26,2,-66.760833,18.970833,8.300,0.000000,1.000000,0.781831,0.623490,5.000000e-01,-0.866025,121,5
2023-05-01 00:16:20.641,5.10,0.6850,0.66,12,-177.433500,-29.802000,28.056,0.000000,1.000000,0.781831,0.623490,5.000000e-01,-0.866025,121,5
2023-05-01 00:13:39.447,4.10,2.7470,0.74,0,56.695500,27.667900,10.000,0.000000,1.000000,0.781831,0.623490,5.000000e-01,-0.866025,121,5


In [None]:
# Defining the Window size and Target Predictions
window_size = 24
target_column = df.columns[:7]

In [None]:
# Defining the Input and Output Features
X1 = df
Y1 = df[target_column]

In [None]:
# Scaling the Dataset
scaled_X, scaler_X = scaler_dataset(X1)
scaled_Y, scaler_Y = scaler_dataset(Y1)

In [None]:
# Creting the Window Sequences
X, Y = SingleStepMultiVARS_SeperateSampler(scaled_X, scaled_Y, window_size, target_column)
X, Y = np.array(X), np.array(Y)

In [None]:
X_train, y_train, X_val, y_val, X_test, y_test = split_dataset(X, Y)

In [None]:
# Converting the dataset to Torch DataLoader format
BATCH_SIZE = 64

train_tensor = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
valid_tensor = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
test_tensor = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))

train_dataloader = DataLoader(
    train_tensor, 
    batch_size=BATCH_SIZE, 
    shuffle=False
)

valid_dataloader = DataLoader(
    valid_tensor,
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_dataloader = DataLoader(
    test_tensor,
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [None]:
# Hyperparameters
input_size = X.shape[-1]
hidden_size = 128
num_layers = 4
dropout_prob = 0.45
output_size = len(target_column)
n_epochs = 20
learning_rate = 0.001

early_stopping = Early_Stopping(patience=20, verbose=True)
checkpoint = ModelCheckPoint(file_path=r'C:\Projs\COde\Meteo\MetP\src\model\new_best_lstm_model.pth', verbose=True)

In [None]:
model = EarthquakeModel(input_size, hidden_size, num_layers, output_size, dropout_prob).to("cuda")
criterion = nn.HuberLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

AssertionError: Torch not compiled with CUDA enabled

In [None]:
scaler = GradScaler()
def train_model(model, train_loader, val_loader, criterion, optimizer, lscheduler, num_epochs, early_stopping, checkpoint):
    train_losses, val_losses = [], []
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to("cuda"), targets.to("cuda")
            optimizer.zero_grad()
            
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to("cuda"), targets.to("cuda")
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        scheduler.step(val_loss)
        early_stopping(val_loss)
        checkpoint(model, val_loss)
        
        # if early_stopping.early_stop:
        #     print("Early stopping triggered")
        #     break
    
    return train_losses, val_losses

train_losses, val_losses = train_model(model, train_dataloader, valid_dataloader, criterion, optimizer, scheduler, n_epochs, early_stopping, checkpoint)

NameError: name 'model' is not defined

In [None]:
## Validation Step for the Enhanced model.
def test_step():
    # Evaluate on test set
    model.load_state_dict(torch.load(model))
    model.eval()
    test_loss = 0
    predictions = []
    actuals = []

    with torch.no_grad():
        for inputs, targets in test_dataloader:
            inputs, targets = inputs.to("cuda"), targets.to("cuda")
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(targets.cpu().numpy())

    test_loss /= len(test_dataloader)
    print(f"Test Loss: {test_loss:.4f}")

    # Denormalize predictions and actuals
    predictions = scaler_Y.inverse_transform(np.array(predictions))
    actuals = scaler_Y.inverse_transform(np.array(actuals))

    # Calculate RMSE for each target variable
    for i, col in enumerate(target_column):
        rmse = np.sqrt(np.mean((predictions[:, i] - actuals[:, i])**2))
        print(f"RMSE for {col}: {rmse:.4f}")


---

# Data Manipulation 2

In [1]:
import os
import sys
sys.path.append(os.path.abspath('..'))

import pandas as pd
import numpy as np

from src.helpers.datapi import datas, url_data_call
from src.model.model import Early_Stopping, ModelCheckPoint, EarthquakeModel
from src.preprocessing.data_preprocessing import *

import datetime
from datetime import timedelta


def data_prep() -> pd.DataFrame:
    df = pd.DataFrame()

    for key, values in datas.items():
        # print(f"{key} with value: {values}")
        pseudo_df = url_data_call(datas[key])

        df = pd.concat([df, pseudo_df])

    return df


def prep_D(data_Frame: pd.DataFrame):
    df = data_preprocessing(data_Frame, ts=True)

    df = imput_encode(df)

    df2_ffill = df.copy()
    df2_ffill['dmin'] = df2_ffill['dmin'].ffill()    
    df2_ffill = CyclicTimeTransform(df2_ffill)

    X1, Y1, target_columns = var_and_tar(df2_ffill)
    scaled_X, scaler_X = scaler_dataset(X1)
    scaled_Y, scaler_Y = scaler_dataset(Y1)

    return scaled_X, scaled_Y, scaler_X, scaler_Y


In [4]:
dicg = df.isna().any().to_dict()
dicg
    

{'mag': False,
 'dmin': False,
 'rms': False,
 'magType': False,
 'longitude': False,
 'latitude': False,
 'elevation': False}

In [2]:
df = data_prep()
# scaled_X, scaled_Y, scaler_X, scaler_Y = prep_D(data_Frame=df)

#### Data Manipulation.. Nuff Said

In [5]:
# Testing the Data and its missing value randomness and bias.
'''
from statsmodels.imputation.mice import test_mcar

# Perform Little's MCAR test
statistic, p_value = test_mcar(df)

print(f"Chi-square statistic: {statistic}")
print(f"P-value: {p_value}")
'''

'\nfrom statsmodels.imputation.mice import test_mcar\n\n# Perform Little\'s MCAR test\nstatistic, p_value = test_mcar(df)\n\nprint(f"Chi-square statistic: {statistic}")\nprint(f"P-value: {p_value}")\n'

In [3]:
df = data_preprocessing(df, ts=True)
df = imput_encode(df)
df

Unnamed: 0_level_0,mag,dmin,rms,magType,longitude,latitude,elevation
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-12-31 23:54:48.008,3.60,1.0600,0.51,4,-166.619200,52.694100,35.000
2022-12-31 23:04:12.381,4.90,1.0960,0.75,13,-69.659000,-16.501300,190.386
2022-12-31 22:44:54.316,4.10,3.7780,0.46,0,-179.737700,-17.770500,613.983
2022-12-31 21:14:58.931,3.00,3.7780,0.67,4,-151.154700,62.404900,6.600
2022-12-31 21:00:24.164,2.60,0.6190,0.42,4,-176.029300,51.290000,10.000
...,...,...,...,...,...,...,...
2023-05-01 00:49:00.850,3.09,0.5998,0.21,2,-66.786167,18.948833,14.100
2023-05-01 00:46:53.270,3.13,0.6209,0.26,2,-66.760833,18.970833,8.300
2023-05-01 00:16:20.641,5.10,0.6850,0.66,13,-177.433500,-29.802000,28.056
2023-05-01 00:13:39.447,4.10,2.7470,0.74,0,56.695500,27.667900,10.000


In [13]:
np.ptp(df['magType'], axis=0)

13

In [8]:
df2 = df.copy()
df2.reset_index(inplace=True)

df2['time_since_last_event'] = df2['time'].diff().dt.total_seconds()
df2['time_since_last_event'] = df2['time_since_last_event'].fillna(0)
df2

Unnamed: 0,time,mag,dmin,rms,magType,longitude,latitude,elevation,time_since_last_event
0,2022-12-31 23:54:48.008,3.60,1.0600,0.51,4,-166.619200,52.694100,35.000,0.000
1,2022-12-31 23:04:12.381,4.90,1.0960,0.75,13,-69.659000,-16.501300,190.386,-3035.627
2,2022-12-31 22:44:54.316,4.10,3.7780,0.46,0,-179.737700,-17.770500,613.983,-1158.065
3,2022-12-31 21:14:58.931,3.00,,0.67,4,-151.154700,62.404900,6.600,-5395.385
4,2022-12-31 21:00:24.164,2.60,0.6190,0.42,4,-176.029300,51.290000,10.000,-874.767
...,...,...,...,...,...,...,...,...,...
44563,2023-05-01 00:49:00.850,3.09,0.5998,0.21,2,-66.786167,18.948833,14.100,-191.400
44564,2023-05-01 00:46:53.270,3.13,0.6209,0.26,2,-66.760833,18.970833,8.300,-127.580
44565,2023-05-01 00:16:20.641,5.10,0.6850,0.66,13,-177.433500,-29.802000,28.056,-1832.629
44566,2023-05-01 00:13:39.447,4.10,2.7470,0.74,0,56.695500,27.667900,10.000,-161.194


In [9]:
# Create spatial-temporal grid
df2['time_bin'] = pd.to_datetime(df2['time']).dt.to_period('D')  # Daily bins
df2['lat_bin'] = pd.cut(df2['latitude'], bins=50)  # Adjust bin number as needed
df2['lon_bin'] = pd.cut(df2['longitude'], bins=50)

In [17]:
df2.mag.describe()

count    44568.000000
mean         3.874214
std          0.857750
min          2.500000
25%          3.000000
50%          4.200000
75%          4.500000
max          7.800000
Name: mag, dtype: float64

In [14]:
# Aggregate features
agg_df = df2.groupby(['time_bin', 'lat_bin', 'lon_bin']).agg({
    'mag': ['count', 'max', 'mean'],
    'time': lambda x: (x.max() - x.min()).total_seconds()  # Time span
}).reset_index()

  agg_df = df2.groupby(['time_bin', 'lat_bin', 'lon_bin']).agg({


In [18]:
agg_df.columns = ['time_bin', 'lat_bin', 'lon_bin', 'eq_count', 'max_magnitude', 'avg_magnitude', 'time_span']

In [19]:
agg_df

Unnamed: 0,time_bin,lat_bin,lon_bin,eq_count,max_magnitude,avg_magnitude,time_span
0,2022-05-01,"(-66.002, -62.8]","(-180.359, -172.799]",1,4.7,4.7,0.0
1,2022-05-01,"(-66.002, -62.8]","(-172.799, -165.599]",0,,,
2,2022-05-01,"(-66.002, -62.8]","(-165.599, -158.399]",0,,,
3,2022-05-01,"(-66.002, -62.8]","(-158.399, -151.199]",0,,,
4,2022-05-01,"(-66.002, -62.8]","(-151.199, -143.999]",0,,,
...,...,...,...,...,...,...,...
1524995,2023-12-31,"(83.598, 86.648]","(144.0, 151.2]",0,,,
1524996,2023-12-31,"(83.598, 86.648]","(151.2, 158.4]",0,,,
1524997,2023-12-31,"(83.598, 86.648]","(158.4, 165.599]",0,,,
1524998,2023-12-31,"(83.598, 86.648]","(165.599, 172.799]",0,,,


In [20]:
# Create target variables
agg_df['eq_occurred'] = (agg_df['eq_count'] > 0).astype(int)
agg_df

Unnamed: 0,time_bin,lat_bin,lon_bin,eq_count,max_magnitude,avg_magnitude,time_span,eq_occurred
0,2022-05-01,"(-66.002, -62.8]","(-180.359, -172.799]",1,4.7,4.7,0.0,1
1,2022-05-01,"(-66.002, -62.8]","(-172.799, -165.599]",0,,,,0
2,2022-05-01,"(-66.002, -62.8]","(-165.599, -158.399]",0,,,,0
3,2022-05-01,"(-66.002, -62.8]","(-158.399, -151.199]",0,,,,0
4,2022-05-01,"(-66.002, -62.8]","(-151.199, -143.999]",0,,,,0
...,...,...,...,...,...,...,...,...
1524995,2023-12-31,"(83.598, 86.648]","(144.0, 151.2]",0,,,,0
1524996,2023-12-31,"(83.598, 86.648]","(151.2, 158.4]",0,,,,0
1524997,2023-12-31,"(83.598, 86.648]","(158.4, 165.599]",0,,,,0
1524998,2023-12-31,"(83.598, 86.648]","(165.599, 172.799]",0,,,,0


In [15]:
# df.set_index('timestamp', inplace=True)
scaled_X2 = scaled_X.copy()
scaled_X2.reset_index(inplace=True)
scaled_X2['time']

# Time since last event
scaled_X2['time_since_last_event'] = scaled_X2['time'].diff().dt.total_seconds()

In [None]:
# Create spatial-temporal grid
scaled_X2['time_bin'] = pd.to_datetime(scaled_X2['timestamp']).dt.to_period('D')  # Daily bins
scaled_X2['lat_bin'] = pd.cut(scaled_X2['latitude'], bins=50)  # Adjust bin number as needed
scaled_X2['lon_bin'] = pd.cut(scaled_X2['longitude'], bins=50)

In [16]:
scaled_X2.describe()

Unnamed: 0,time,mag,dmin,rms,magType,longitude,latitude,elevation,Hour sin,Hour cos,Day sin,Day cos,Month sin,Month cos,day_of_year,month,time_since_last_event
count,44568,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44568.0,44567.0
mean,2023-03-06 07:22:12.514709760,0.259286,0.050399,0.300489,0.171227,0.432719,0.55482,0.100025,0.503513,0.506551,0.536587,0.4675826,0.421292,0.492095,0.569425,0.578534,232.6581
min,2022-05-01 00:02:55.593000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-14770.55
25%,2022-10-01 19:14:15.496750080,0.09434,0.009312,0.196721,0.0,0.071912,0.394231,0.020059,0.146447,0.146447,0.099031,6.661338e-16,0.066987,0.066987,0.373626,0.363636,-1613.947
50%,2023-03-02 22:35:14.872999936,0.320755,0.026928,0.306011,0.0,0.314093,0.558105,0.033198,0.5,0.5,0.5,0.3568959,0.5,0.5,0.590659,0.636364,-766.683
75%,2023-08-06 19:52:42.012000,0.377358,0.058793,0.393443,0.307692,0.850201,0.73693,0.097876,0.853553,0.853553,0.900969,0.8019377,0.75,0.933013,0.802198,0.818182,-302.381
max,2023-12-31 23:18:09.731000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,31535720.0
std,,0.16184,0.078753,0.144171,0.252986,0.357661,0.199926,0.166572,0.353436,0.353601,0.365372,0.3671948,0.328462,0.368593,0.273326,0.295512,211249.0


In [17]:
scaled_X2 = scaled_X2.fillna(0)
scaled_X2
# scaled_X2.isna().sum()

Unnamed: 0,time,mag,dmin,rms,magType,longitude,latitude,elevation,Hour sin,Hour cos,Day sin,Day cos,Month sin,Month cos,day_of_year,month,time_since_last_event
0,2022-12-31 23:54:48.008,0.207547,0.020031,0.278689,0.307692,0.037165,0.777350,0.056557,0.370590,0.982963,0.722521,7.216450e-16,0.50,1.000000,1.00000,1.000000,0.000
1,2022-12-31 23:04:12.381,0.452830,0.020711,0.409836,1.000000,0.306501,0.323602,0.283405,0.370590,0.982963,0.722521,7.216450e-16,0.50,1.000000,1.00000,1.000000,-3035.627
2,2022-12-31 22:44:54.316,0.301887,0.071392,0.251366,0.000000,0.000725,0.315279,0.901814,0.250000,0.933013,0.722521,7.216450e-16,0.50,1.000000,1.00000,1.000000,-1158.065
3,2022-12-31 21:14:58.931,0.094340,0.071392,0.366120,0.307692,0.080123,0.841028,0.015095,0.146447,0.853553,0.722521,7.216450e-16,0.50,1.000000,1.00000,1.000000,-5395.385
4,2022-12-31 21:00:24.164,0.018868,0.011697,0.229508,0.307692,0.011026,0.768142,0.020059,0.146447,0.853553,0.722521,7.216450e-16,0.50,1.000000,1.00000,1.000000,-874.767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44563,2023-05-01 00:49:00.850,0.111321,0.011334,0.114754,0.153846,0.314481,0.556065,0.026045,0.500000,1.000000,0.900969,8.019377e-01,0.75,0.066987,0.32967,0.363636,-191.400
44564,2023-05-01 00:46:53.270,0.118868,0.011733,0.142076,0.153846,0.314551,0.556210,0.017577,0.500000,1.000000,0.900969,8.019377e-01,0.75,0.066987,0.32967,0.363636,-127.580
44565,2023-05-01 00:16:20.641,0.490566,0.012944,0.360656,1.000000,0.007126,0.236382,0.046419,0.500000,1.000000,0.900969,8.019377e-01,0.75,0.066987,0.32967,0.363636,-1832.629
44566,2023-05-01 00:13:39.447,0.301887,0.051910,0.404372,0.000000,0.657487,0.613241,0.020059,0.500000,1.000000,0.900969,8.019377e-01,0.75,0.066987,0.32967,0.363636,-161.194


In [18]:
# Event counts in different windows
for window in [1, 7, 30]:
    scaled_X2[f'events_last_{window}'] = scaled_X2['time'].rolling(window).count()

In [11]:
scaled_X2['events_last_7d'] = scaled_X2['time'].rolling(window=7).count()

In [16]:
# Resampled daily features
daily_df = scaled_X2.resample('D', on='time').agg({
    'mag': ['count', 'max', 'mean'],
    'time_since_last_event': 'mean'
})
daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]

In [1]:
daily_df

NameError: name 'daily_df' is not defined

In [20]:
# Merge daily features back to original DataFrame
new_scaled_X2 = pd.merge_asof(scaled_X2, daily_df, left_on='time', right_index=True)

# Add cumulative features
new_scaled_X2['cumulative_events'] = range(1, len(new_scaled_X2) + 1)
new_scaled_X2s['cumulative_magnitude'] = new_scaled_X2['magnitude'].cumsum()

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

In [17]:
daily_df

Unnamed: 0_level_0,mag_count,mag_max,mag_mean,time_since_last_event_mean
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-01,64,0.566038,0.259611,-1422.193328
2022-05-02,64,0.584906,0.222406,-1277.756125
2022-05-03,64,0.471698,0.221728,-1415.068313
2022-05-04,65,0.584906,0.265544,-1267.787062
2022-05-05,74,0.660377,0.277384,-1164.871432
...,...,...,...,...
2023-12-27,76,0.622642,0.216509,-1048.242474
2023-12-28,74,0.754717,0.281515,-1162.970689
2023-12-29,77,0.603774,0.263293,-1119.976351
2023-12-30,86,0.716981,0.264063,-1007.776872


In [8]:
import os
import sys

import pandas as pd
import numpy as np

sys.path.append(os.path.abspath('..'))

from src.helpers.datapi import datas, url_data_call
from src.model.model import Early_Stopping, ModelCheckPoint, EarthquakeModel
from src.preprocessing.data_preprocessing import *

def raw_data_prep() -> pd.DataFrame:
    """
    Calls and defines the data and returns a Pandas DataFrame with basic preprocssing.
    """
    df = pd.DataFrame()

    for key, values in datas.items():
        # print(f"{key} with value: {values}")
        pseudo_df = url_data_call(datas[key])

        df = pd.concat([df, pseudo_df])

    df = data_preprocessing(df, ts=True) ## This function performs basic proecprocessing with an option of Timeseries or not.
    df = imput_encode(df) ## This function encodes and imputs the input data and fills the empty values.

    return df

def prep_D(data_Frame: pd.DataFrame):
    """
    Takes in the raw dataframe and returns a curated and scaled dataframe
    """
    df2_ffill = df.copy()
    df2_ffill = CyclicTimeTransform(df2_ffill)

    X1, Y1, target_columns = var_and_tar(df2_ffill)
    scaled_X, scaler_X = scaler_dataset(X1)
    scaled_Y, scaler_Y = scaler_dataset(Y1)

    return scaled_X, scaled_Y, scaler_X, scaler_Y

def event_counts_for_diff_window(dataFrame: pd.DataFrame) -> pd.DataFrame:
    new_df = dataFrame.copy()

    new_df.reset_index(inplace=True)
    new_df.sort_values('time', inplace=True)

    new_df['time_since_last_event'] = new_df['time'].diff().dt.total_seconds()
    # new_df['time_since_last_event'] = new_df['time_since_last_event'].fillna(0)
    
    # Event counts in different windows
    for window in [1, 7, 30]:
        new_df[f'events_last_{window}'] = new_df['time'].rolling(window).count()

    new_df = new_df.fillna(0)
    daily_df = new_df.resample('D', on='time').agg({
    'mag': ['count', 'max', 'mean'],
    'time_since_last_event': 'mean'
    })

    daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
    daily_df = daily_df.reset_index()

    # # Merge daily features back to original DataFrame
    new_df = pd.merge_asof(df, daily_df, left_on='time', right_on='time', 
                   tolerance=pd.Timedelta('1D'), direction='backward')
    return new_df


df  = raw_data_prep() # Getting the Data
df2 = df.copy()

# agg_df = event_counts_for_diff_window(dataFrame=df2)

event_counts_for_diff_window(dataFrame=df2)

Unnamed: 0,time,mag,dmin,rms,magType,longitude,latitude,elevation,mag_count,mag_max,mag_mean,time_since_last_event_mean
0,2022-05-01 00:02:55.593,4.40,20.9710,0.64,0,27.6375,-52.770900,10.000,64,5.5,3.875938,1341.188766
1,2022-05-01 01:22:46.818,4.70,2.5540,0.77,0,142.3848,29.653000,10.000,64,5.5,3.875938,1341.188766
2,2022-05-01 01:33:35.201,4.20,1.3460,1.06,0,24.1134,39.603900,10.000,64,5.5,3.875938,1341.188766
3,2022-05-01 01:52:26.244,4.50,1.6090,0.83,0,126.4725,26.794400,10.000,64,5.5,3.875938,1341.188766
4,2022-05-01 02:10:52.717,4.30,2.4160,0.65,0,127.9058,3.142900,106.400,64,5.5,3.875938,1341.188766
...,...,...,...,...,...,...,...,...,...,...,...,...
44563,2023-12-31 21:42:27.330,2.56,0.1071,0.17,2,-66.8160,18.064167,14.760,73,5.3,3.782055,1152.619986
44564,2023-12-31 22:13:49.174,4.20,0.4520,0.53,0,144.5892,13.952600,193.928,73,5.3,3.782055,1152.619986
44565,2023-12-31 22:48:19.016,4.40,3.4550,0.54,0,-178.3327,-17.506500,540.654,73,5.3,3.782055,1152.619986
44566,2023-12-31 22:52:31.316,4.20,9.1640,0.68,0,141.4208,-2.688600,10.000,73,5.3,3.782055,1152.619986
