# Unveiling Temporal Kolmogorov–Arnold Networks (TKAN): A Mathematical Journey into Advanced Time Series Forecasting

### Exploring the Intersection of Nonlinear Dynamics and Machine Learning for Temporal Data Mastery

-------

Autor: Bruno Muñoz Marcos



*In an increasingly data-driven world, the ability to predict future events with precision is a cornerstone of scientific advancement and technological innovation. From anticipating financial market movements to forecasting climate patterns, accurate predictions empower decision-makers to strategize effectively, mitigate risks, and seize opportunities.
Time series forecasting, the science of predicting future values based on previously observed values, is pivotal across various domains. Despite significant progress, forecasting remains a formidable challenge due to the inherent complexities of temporal data - nonlinearities, long-term dependencies, and stochastic behaviors.*

In [None]:
# 1. Libraries and data cleansing:
#-------------------------------------------------------------------------------------------------------------------

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tkan import TKAN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
import random 
import matplotlib.dates as mdates

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)


data = pd.read_csv('sp500.csv')

data['Último'] = data['Último'].apply(lambda x: float(str(x).replace(',', '')) * 1000)
data['Fecha'] = pd.to_datetime(data['Fecha'])  
data = data.sort_values(by='Fecha', ascending=True).reset_index(drop=True)

data_series = data['Último'].values.reshape(-1, 1)


# 2. Data modelling:
#-------------------------------------------------------------------------------------------------------------------

scaler = MinMaxScaler()
data_series_scaled = scaler.fit_transform(data_series)

window_size = 20 
future_steps_list = [1, 2, 3, 5, 6, 7, 10, 12]  
results = []

for future_steps in future_steps_list:

    X, y = [], []
    for i in range(len(data_series_scaled) - window_size - future_steps + 1):
        X.append(data_series_scaled[i:i + window_size])
        y.append(data_series_scaled[i + window_size:i + window_size + future_steps].flatten())

    X = np.array(X)
    y = np.array(y)  

    split_index = int(len(X) * 0.8)
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    fechas_train = data['Fecha'].iloc[window_size:split_index + window_size].values
    fechas_test = data['Fecha'].iloc[split_index + window_size:split_index + window_size + len(y_test)].values

# 3. Construction and training of the TKAN model:
#-------------------------------------------------------------------------------------------------------------------

    model_tkan = Sequential([
        TKAN(200, sub_kan_configs=[{'spline_order': 4, 'grid_size': 12}, {'spline_order': 3, 'grid_size': 10}, {'spline_order': 5, 'grid_size': 8}], 
            return_sequences=False, use_bias=True),
        Dense(units=future_steps, activation='linear') 
    ])

    model_tkan.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    history_tkan = model_tkan.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

    y_pred_tkan = model_tkan.predict(X_test)

    y_test_inverse = scaler.inverse_transform(y_test)
    y_pred_tkan_inverse = scaler.inverse_transform(y_pred_tkan)

# 4. Construction and training of the LSTM model:
#-------------------------------------------------------------------------------------------------------------------

    model_lstm = Sequential([
        LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
        Dense(units=future_steps, activation='linear')  
    ])
    model_lstm.compile(optimizer='adam', loss='mse')

    history_lstm = model_lstm.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

    y_pred_lstm = model_lstm.predict(X_test)

    y_pred_lstm_inverse = scaler.inverse_transform(y_pred_lstm)

# 5. Evaluation of models and storage of results:
#-------------------------------------------------------------------------------------------------------------------

    mse_tkan = mean_squared_error(y_test_inverse, y_pred_tkan_inverse)
    r2_tkan = r2_score(y_test_inverse, y_pred_tkan_inverse)

    mse_lstm = mean_squared_error(y_test_inverse, y_pred_lstm_inverse)
    r2_lstm = r2_score(y_test_inverse, y_pred_lstm_inverse)

    print(f'Modelo TKAN - MSE: {mse_tkan}, R^2: {r2_tkan}')
    print(f'Modelo LSTM - MSE: {mse_lstm}, R^2: {r2_lstm}')

    results.append({
        'future_steps': future_steps,
        'R^2 TKAN': r2_tkan,
        'R^2 LSTM': r2_lstm
    })

    results_df = pd.DataFrame(results)
    results_df

In [None]:
# 6. Visualisation of the graphics:
#-------------------------------------------------------------------------------------------------------------------

scaler = MinMaxScaler()
data_series_scaled = scaler.fit_transform(data_series)

window_size = 20 
future_steps_list = [1] 
results = []

for future_steps in future_steps_list:
    X, y = [], []
    for i in range(len(data_series_scaled) - window_size - future_steps + 1):
        X.append(data_series_scaled[i:i + window_size])
        y.append(data_series_scaled[i + window_size])

    X = np.array(X)
    y = np.array(y).reshape(-1, 1)

    split_index = int(len(X) * 0.8)
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    fechas_train = data['Fecha'].iloc[window_size:split_index+window_size].values
    fechas_test = data['Fecha'].iloc[split_index+window_size:split_index+window_size+len(y_test)].values

    model_tkan = Sequential([
        TKAN(200, sub_kan_configs=[{'spline_order': 4, 'grid_size': 12}, {'spline_order': 3, 'grid_size': 10}, {'spline_order': 5, 'grid_size': 8}], 
            return_sequences=False, use_bias=True),
        Dense(1) 
    ])

    model_tkan.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    history_tkan = model_tkan.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

    y_pred_tkan = model_tkan.predict(X_test)

    y_pred_tkan_inverse = scaler.inverse_transform(y_pred_tkan)
    y_test_inverse = scaler.inverse_transform(y_test)

    min_len = min(len(fechas_test), len(y_pred_tkan_inverse), len(y_test_inverse))


    fechas_test = fechas_test[:min_len]
    y_test_inverse = y_test_inverse[:min_len]
    y_pred_tkan_inverse = y_pred_tkan_inverse[:min_len]

    # Display TKAN results with dates
    plt.figure(figsize=(12, 6))

    plt.plot(fechas_test, y_test_inverse.flatten(), label='Real Values', color='blue', linewidth=2)
    plt.plot(fechas_test, y_pred_tkan_inverse.flatten(), label='TKAN Predictions', color='green', linestyle='--', linewidth=2)

    plt.legend(loc='upper left', fontsize=12)
    plt.title('TKAN Predictions vs Real Values', fontsize=16, fontweight='bold')
    plt.ylabel('SP500', fontsize=14)

    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))

    plt.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray', alpha=0.7)

    plt.xlim([fechas_test[0], fechas_test[-1]])

    plt.tight_layout()
    plt.savefig('TKAN.svg')
    plt.show()


    model_lstm = Sequential([
        LSTM(200, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
        Dense(1)  
    ])

    model_lstm.compile(optimizer='adam', loss='mse')

    history_lstm = model_lstm.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

    y_pred_lstm = model_lstm.predict(X_test)

    y_pred_lstm_inverse = scaler.inverse_transform(y_pred_lstm)

    min_len = min(len(fechas_test), len(y_pred_lstm_inverse), len(y_test_inverse))

    fechas_test = fechas_test[:min_len]
    y_test_inverse = y_test_inverse[:min_len]
    y_pred_lstm_inverse = y_pred_lstm_inverse[:min_len]

    # Display LSTM results with dates
    plt.figure(figsize=(12, 6))

    plt.plot(fechas_test, y_test_inverse.flatten(), label='Real Values', color='blue', linewidth=2)
    plt.plot(fechas_test, y_pred_lstm_inverse.flatten(), label='LSTM Predictions', color='red', linestyle='--', linewidth=2)

    plt.legend(loc='upper left', fontsize=12)
    plt.title('LSTM Predictions vs Real Values', fontsize=16, fontweight='bold')
    plt.ylabel('SP500', fontsize=14)

    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))

    plt.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray', alpha=0.7)

    plt.xlim([fechas_test[0], fechas_test[-1]])

    plt.tight_layout()
    plt.savefig('LSTM.svg')
    plt.show()


    # Show the whole time series with the predictions at the end
    plt.figure(figsize=(12, 6))

    plt.plot(data['Fecha'], data['Último'], label='Real Value', color='blue', linewidth=2)

    fechas_pred_tkan = data['Fecha'].iloc[-len(fechas_test):].values
    plt.plot(fechas_pred_tkan, y_pred_tkan_inverse.flatten(), label='TKAN Predictions', color='green', linestyle='--', linewidth=2)

    plt.plot(fechas_pred_tkan, y_pred_lstm_inverse.flatten(), label='LSTM Predictions', color='red', linestyle='--', linewidth=2)

    plt.legend(loc='upper left', fontsize=12)

    plt.title('Temporal serie with TKAN and LSTM predictions', fontsize=16, fontweight='bold')
    plt.ylabel('SP500', fontsize=14)

    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=3))

    plt.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray', alpha=0.7)

    plt.xlim([data['Fecha'].min(), data['Fecha'].max()])

    plt.tight_layout()
    plt.savefig('full_image.svg')
    plt.show()

    mse_tkan = mean_squared_error(y_test_inverse, y_pred_tkan_inverse)
    r2_tkan = r2_score(y_test_inverse, y_pred_tkan_inverse)

    mse_lstm = mean_squared_error(y_test_inverse, y_pred_lstm_inverse)
    r2_lstm = r2_score(y_test_inverse, y_pred_lstm_inverse)

    print(f'Modelo TKAN - MSE: {mse_tkan}, R^2: {r2_tkan}')
    print(f'Modelo LSTM - MSE: {mse_lstm}, R^2: {r2_lstm}')

    results.append({
        'future_steps': future_steps,
        'R^2 TKAN': r2_tkan,
        'R^2 LSTM': r2_lstm
    })

results_df = pd.DataFrame(results)
results_df

