# May have found a simpler TimeSeries example

https://www.kaggle.com/code/iamleonie/intro-to-time-series-forecasting

In [1]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import numpy as np

data = pd.read_csv('data.txt', delim_whitespace=True)
data['Timestamp'] = data['Timestamp'].apply(lambda x: pd.to_datetime(x, format='%Y-%m-%d:%H:%M:%S.%f'))
#data['Timestamp'] = data['Timestamp'].apply(lambda x: pd.Timestamp.timestamp(x))
data.set_index('Timestamp', inplace=True)
data.drop('Source', axis=1, inplace=True)


for c in data.columns:
    data[c].replace(to_replace=-99999.000000, value=float("NaN"), inplace=True)
   


  data = pd.read_csv('data.txt', delim_whitespace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[c].replace(to_replace=-99999.000000, value=float("NaN"), inplace=True)


Option 1: replace with the mean value of the column

In [2]:
# for c in data.columns:
#     data[c].fillna(value=data[c].mean(), inplace=True)

Option 2: try to interpolate

In [3]:
for c in data.columns:
    data[c].interpolate(inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[c].interpolate(inplace=True)


Take only data of medians

In [4]:
data.columns

Index(['Bt_med', 'Bt_min', 'Bt_max', 'Bx_med', 'Bx_min', 'Bx_max', 'By_med',
       'By_min', 'By_max', 'Bz_med', 'Bz_min', 'Bz_max', 'Phi_mean', 'Phi_min',
       'Phi_max', 'Theta_med', 'Theta_min', 'Theta_max', 'Dens_med',
       'Dens_min', 'Dens_max', 'Speed_med', 'Speed_min', 'Speed_max',
       'Temp_med', 'Temp_min', 'Temp_max'],
      dtype='object')

In [5]:
datatouse = data.drop(['Bt_min', 'Bt_max',  'Bx_min', 'Bx_max',
       'By_min', 'By_max', 'Bz_min', 'Bz_max',  'Phi_min',
       'Phi_max',  'Theta_min', 'Theta_max', 
       'Dens_min', 'Dens_max',  'Speed_min', 'Speed_max',
        'Temp_min', 'Temp_max'], axis=1)
datatouse

Unnamed: 0_level_0,Bt_med,Bx_med,By_med,Bz_med,Phi_mean,Theta_med,Dens_med,Speed_med,Temp_med
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-03-13 00:00:00,7.21,2.60,-0.74,-6.64,346.63,-68.61,0.18,555.24,136135.0
2025-03-13 00:05:00,6.98,2.03,-0.43,-6.68,337.12,-72.67,0.16,558.27,127127.0
2025-03-13 00:10:00,6.64,3.81,-0.55,-5.37,350.20,-54.55,0.11,536.44,118924.0
2025-03-13 00:15:00,6.27,3.85,-3.39,-2.40,332.22,-22.92,0.10,548.70,176311.0
2025-03-13 00:20:00,6.55,5.86,-2.40,1.04,338.21,9.21,0.10,534.14,246684.0
...,...,...,...,...,...,...,...,...,...
2025-03-19 18:10:00,6.45,5.12,-3.51,-2.28,319.36,-20.69,12.27,501.60,250832.0
2025-03-19 18:15:00,7.08,5.77,-1.22,-3.26,350.09,-28.73,12.00,500.80,249696.0
2025-03-19 18:20:00,7.59,4.50,-1.47,-5.70,339.30,-48.82,11.81,500.70,237126.0
2025-03-19 18:25:00,7.84,4.72,-0.92,-6.04,348.99,-52.18,12.41,496.50,227496.0


In [6]:
target = 'Bt-med'
features = [col for col in datatouse.columns if col != target]
datatouse[datatouse.Bt_med > 7.0]



Unnamed: 0_level_0,Bt_med,Bx_med,By_med,Bz_med,Phi_mean,Theta_med,Dens_med,Speed_med,Temp_med
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-03-13 00:00:00,7.21,2.60,-0.74,-6.64,346.63,-68.61,0.18,555.24,136135.0
2025-03-13 00:30:00,7.03,3.49,-5.40,2.59,306.43,21.33,0.10,543.47,180501.0
2025-03-13 00:35:00,7.04,3.81,-5.70,1.90,303.71,15.68,0.10,538.52,158357.0
2025-03-13 00:40:00,7.11,4.31,-5.34,1.86,309.01,15.14,0.10,534.86,183631.0
2025-03-13 00:45:00,7.10,4.73,-5.11,1.59,314.07,12.81,0.10,532.62,250947.0
...,...,...,...,...,...,...,...,...,...
2025-03-19 17:15:00,7.24,3.22,-3.68,-5.46,312.00,-48.24,11.89,489.70,234755.0
2025-03-19 18:15:00,7.08,5.77,-1.22,-3.26,350.09,-28.73,12.00,500.80,249696.0
2025-03-19 18:20:00,7.59,4.50,-1.47,-5.70,339.30,-48.82,11.81,500.70,237126.0
2025-03-19 18:25:00,7.84,4.72,-0.92,-6.04,348.99,-52.18,12.41,496.50,227496.0


In [8]:
len(data.columns)

27

LSTM or GRU

# Try an RNN