In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:

# Generate sample time series data
np.random.seed(42)
date_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='D')
data = pd.DataFrame({
    'Date': date_range,
    'Value': np.random.randn(len(date_range))
})
data.set_index('Date', inplace=True)
data

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2022-01-01,0.496714
2022-01-02,-0.138264
2022-01-03,0.647689
2022-01-04,1.523030
2022-01-05,-0.234153
...,...
2022-12-27,0.519347
2022-12-28,1.532739
2022-12-29,-0.108760
2022-12-30,0.401712


In [4]:
# 1. Lag Features
for lag in range(1, 8):
    data[f'lag_{lag}'] = data['Value'].shift(lag)
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-01,0.496714,,,,,,,
2022-01-02,-0.138264,0.496714,,,,,,
2022-01-03,0.647689,-0.138264,0.496714,,,,,
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,
...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513


In [5]:
# 2. Rolling Window Statistics
data['rolling_mean'] = data['Value'].rolling(window=7).mean()
data['rolling_std'] = data['Value'].rolling(window=7).std()
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,rolling_std
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-01-01,0.496714,,,,,,,,,
2022-01-02,-0.138264,0.496714,,,,,,,,
2022-01-03,0.647689,-0.138264,0.496714,,,,,,,
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,,,
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,,,
...,...,...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939,-0.187698,0.562829
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,0.033982,0.864881
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,0.161663,0.743866
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,0.221695,0.743858


In [7]:
# 3. Expanding Window Statistics
data['expanding_mean'] = data['Value'].expanding().mean()
data['expanding_std'] = data['Value'].expanding().std()
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,rolling_std,expanding_mean,expanding_std
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-01,0.496714,,,,,,,,,,0.496714,
2022-01-02,-0.138264,0.496714,,,,,,,,,0.179225,0.448998
2022-01-03,0.647689,-0.138264,0.496714,,,,,,,,0.335379,0.417076
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,,,,0.632292,0.684541
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,,,,0.459003,0.708232
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939,-0.187698,0.562829,0.003088,0.948972
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,0.033982,0.864881,0.007313,0.951061
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,0.161663,0.743866,0.006993,0.949766
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,0.221695,0.743858,0.008078,0.948683


In [8]:
# 4. Datetime Features
data['day_of_week'] = data.index.dayofweek
data['month'] = data.index.month
data['quarter'] = data.index.quarter
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,rolling_std,expanding_mean,expanding_std,day_of_week,month,quarter
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-01-01,0.496714,,,,,,,,,,0.496714,,5,1,1
2022-01-02,-0.138264,0.496714,,,,,,,,,0.179225,0.448998,6,1,1
2022-01-03,0.647689,-0.138264,0.496714,,,,,,,,0.335379,0.417076,0,1,1
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,,,,0.632292,0.684541,1,1,1
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,,,,0.459003,0.708232,2,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939,-0.187698,0.562829,0.003088,0.948972,1,12,4
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,0.033982,0.864881,0.007313,0.951061,2,12,4
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,0.161663,0.743866,0.006993,0.949766,3,12,4
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,0.221695,0.743858,0.008078,0.948683,4,12,4


In [9]:
# 5. Seasonal Features (Example: Fourier Transformation)
fourier_terms = 5  # Number of Fourier terms
for i in range(1, fourier_terms + 1):
    data[f'fourier_{i}'] = np.sin(2 * np.pi * i * data.index.dayofyear / 365.25)
    data[f'fourier_{i}'] = StandardScaler().fit_transform(data[[f'fourier_{i}']])
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,rolling_std,expanding_mean,expanding_std,day_of_week,month,quarter,fourier_1,fourier_2,fourier_3,fourier_4,fourier_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-01-01,0.496714,,,,,,,,,,0.496714,,5,1,1,0.024325,0.048642,0.072945,0.097227,0.121479
2022-01-02,-0.138264,0.496714,,,,,,,,,0.179225,0.448998,6,1,1,0.048636,0.097214,0.145677,0.193968,0.242029
2022-01-03,0.647689,-0.138264,0.496714,,,,,,,,0.335379,0.417076,0,1,1,0.072933,0.145671,0.218022,0.289792,0.360790
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,,,,0.632292,0.684541,1,1,1,0.097208,0.193956,0.289785,0.384244,0.476884
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,,,,0.459003,0.708232,2,1,1,0.121454,0.242011,0.360778,0.476878,0.589452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939,-0.187698,0.562829,0.003088,0.948972,1,12,4,-0.103260,-0.205968,-0.307576,-0.407540,-0.505327
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,0.033982,0.864881,0.007313,0.951061,2,12,4,-0.078991,-0.157736,-0.235987,-0.313501,-0.390035
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,0.161663,0.743866,0.006993,0.949766,3,12,4,-0.054699,-0.109316,-0.163770,-0.217978,-0.271859
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,0.221695,0.743858,0.008078,0.948683,4,12,4,-0.030391,-0.060768,-0.091116,-0.121423,-0.151673


In [10]:
# 6. Time-based Features
data['days_since_start'] = (data.index - data.index[0]).days
data['days_until_end_of_year'] = (data.index.max() - data.index).days
data

Unnamed: 0_level_0,Value,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,rolling_std,...,day_of_week,month,quarter,fourier_1,fourier_2,fourier_3,fourier_4,fourier_5,days_since_start,days_until_end_of_year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-01,0.496714,,,,,,,,,,...,5,1,1,0.024325,0.048642,0.072945,0.097227,0.121479,0,364
2022-01-02,-0.138264,0.496714,,,,,,,,,...,6,1,1,0.048636,0.097214,0.145677,0.193968,0.242029,1,363
2022-01-03,0.647689,-0.138264,0.496714,,,,,,,,...,0,1,1,0.072933,0.145671,0.218022,0.289792,0.360790,2,362
2022-01-04,1.523030,0.647689,-0.138264,0.496714,,,,,,,...,1,1,1,0.097208,0.193956,0.289785,0.384244,0.476884,3,361
2022-01-05,-0.234153,1.523030,0.647689,-0.138264,0.496714,,,,,,...,2,1,1,0.121454,0.242011,0.360778,0.476878,0.589452,4,360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-27,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,-0.159939,-0.187698,0.562829,...,1,12,4,-0.103260,-0.205968,-0.307576,-0.407540,-0.505327,360,4
2022-12-28,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,-0.019016,0.033982,0.864881,...,2,12,4,-0.078991,-0.157736,-0.235987,-0.313501,-0.390035,361,3
2022-12-29,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,-1.002529,0.161663,0.743866,...,3,12,4,-0.054699,-0.109316,-0.163770,-0.217978,-0.271859,362,2
2022-12-30,0.401712,-0.108760,1.532739,0.519347,-0.827231,0.322719,-0.288659,-0.018513,0.221695,0.743858,...,4,12,4,-0.030391,-0.060768,-0.091116,-0.121423,-0.151673,363,1


In [11]:
# Print the first few rows of the transformed DataFrame
print("Transformed DataFrame:")
print(data.head())

# Save the transformed data to a CSV file
data.to_csv('transformed_data.csv')

Transformed DataFrame:
               Value     lag_1     lag_2     lag_3     lag_4  lag_5  lag_6  \
Date                                                                         
2022-01-01  0.496714       NaN       NaN       NaN       NaN    NaN    NaN   
2022-01-02 -0.138264  0.496714       NaN       NaN       NaN    NaN    NaN   
2022-01-03  0.647689 -0.138264  0.496714       NaN       NaN    NaN    NaN   
2022-01-04  1.523030  0.647689 -0.138264  0.496714       NaN    NaN    NaN   
2022-01-05 -0.234153  1.523030  0.647689 -0.138264  0.496714    NaN    NaN   

            lag_7  rolling_mean  rolling_std  ...  day_of_week  month  \
Date                                          ...                       
2022-01-01    NaN           NaN          NaN  ...            5      1   
2022-01-02    NaN           NaN          NaN  ...            6      1   
2022-01-03    NaN           NaN          NaN  ...            0      1   
2022-01-04    NaN           NaN          NaN  ...            1   