In [1]:
import pandas as pd
import numpy as np

# Generate a sample time-series dataset
np.random.seed(42)
date_range = pd.date_range(start='2020-01-01', periods=10, freq='D')
data = pd.DataFrame({
    'date': date_range,
    'value': np.random.randn(10) * 10 + 50  # Some random data
})

# Set the date as the index for time-series manipulation
data.set_index('date', inplace=True)

# Show the original dataset
print("Original Time-Series Data:")
print(data)

# --- Creating Lag Features ---

# Create lag features (shift the 'value' column by 1, 2, and 3 time steps)
data['lag_1'] = data['value'].shift(1)
data['lag_2'] = data['value'].shift(2)
data['lag_3'] = data['value'].shift(3)

# Show the dataset with lag features
print("\nTime-Series Data with Lag Features:")
print(data)

# --- Additional Rolling Statistics as Features ---
# Create a rolling mean with a window of 2 days (for example)
data['rolling_mean_2'] = data['value'].rolling(window=2).mean()

# Show the dataset with rolling statistics
print("\nTime-Series Data with Rolling Mean Feature:")
print(data)


Original Time-Series Data:
                value
date                 
2020-01-01  54.967142
2020-01-02  48.617357
2020-01-03  56.476885
2020-01-04  65.230299
2020-01-05  47.658466
2020-01-06  47.658630
2020-01-07  65.792128
2020-01-08  57.674347
2020-01-09  45.305256
2020-01-10  55.425600

Time-Series Data with Lag Features:
                value      lag_1      lag_2      lag_3
date                                                  
2020-01-01  54.967142        NaN        NaN        NaN
2020-01-02  48.617357  54.967142        NaN        NaN
2020-01-03  56.476885  48.617357  54.967142        NaN
2020-01-04  65.230299  56.476885  48.617357  54.967142
2020-01-05  47.658466  65.230299  56.476885  48.617357
2020-01-06  47.658630  47.658466  65.230299  56.476885
2020-01-07  65.792128  47.658630  47.658466  65.230299
2020-01-08  57.674347  65.792128  47.658630  47.658466
2020-01-09  45.305256  57.674347  65.792128  47.658630
2020-01-10  55.425600  45.305256  57.674347  65.792128

Time-Series