# ARIMA Experiments

___
___

- autoregressive models: AR(p)

- mixed autoregressive moving average models: ARMA(p, q)

- integration models: ARIMA(p, d, q)

<br/>

___

## #1 Read Data

In [32]:
import numpy as np

# Read the data
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)
print(wind_speed.shape)

initial_data = wind_speed

(50529,)


___
## Error Metric Components:

In [33]:
import numpy as np

def sum_of_squares_error(original, prediction):
    """
    Calculates the sum of squares error between two vectors.

    Parameters:
    original (numpy.ndarray): The original vector.
    prediction (numpy.ndarray): The prediction vector.

    Returns:
    float: The sum of squares error.
    """
    error = np.sum((original - prediction) ** 2)
    return error

# Example:
original = np.array([1, 2, 3])
prediction = np.array([3, 4, 5])
error = sum_of_squares_error(original, prediction)
print(f"Sum of Squares Error: {error}")

Sum of Squares Error: 12


___

# ARIMA Components

## 1. Autoregressive Component

- **p**

- This parameter represents the number of lag observations included in the model.
- It essentially captures the relationship between an observation and a number of lagged observations (previous time steps).

### #1 Create the lagged Matrix

In [34]:
import numpy as np

def create_lagged_matrix(data, lags):
    """
    Create a matrix with lagged data.

    Parameters:
    data (numpy array): The input data array.
    lags (int): The number of lags.

    Returns:
    numpy array: A matrix with the lagged data.
    """

    # Create an empty matrix with the appropriate dimensions
    rows = len(data)
    lagged_matrix = np.zeros((rows, lags + 1))

    # Loop over number of lags and roll data 
    for i in range(lags + 1):
        lagged_matrix[:, i] = np.roll(data, -i)

    # Remove rows with incomplete data
    lagged_matrix = lagged_matrix[:rows - lags]

    return lagged_matrix

# Example
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)
lags = 2
lagged_matrix = create_lagged_matrix(wind_speed, lags)
print(lagged_matrix.shape)
print(lagged_matrix)

(50527, 3)
[[ 5.67216682  5.2160368   5.65967417]
 [ 5.2160368   5.65967417  5.57794094]
 [ 5.65967417  5.57794094  5.60405207]
 ...
 [11.40402985  7.3326478   8.43535805]
 [ 7.3326478   8.43535805  9.42136574]
 [ 8.43535805  9.42136574  9.97933197]]


### #2 Matrix Vector Multiplication

In [35]:
import numpy as np

def matrix_vector_multiplication(lagged_matrix, vector):
    """
    Perform matrix-vector multiplication.

    Parameters:
    matrix (numpy array): The input matrix.
    vector (numpy array): The vector to multiply with the matrix.

    Returns:
    numpy array: The result of the matrix-vector multiplication.
    """
    result = np.matmul(lagged_matrix, vector)
    return result

# Example 
arima_vector = np.array([0.9208, -0.0111, 0.0766])
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)
print("Matrix Vector Multiplication Result:")
print(result_matrix.shape)
print(result_matrix)

Matrix Vector Multiplication Result:
(50527,)
[ 5.59856424  5.16737458  5.57878322 ... 11.06558672  7.37994624
  8.42711736]


### #3 Complete Autoregressive component

In [36]:
import numpy as np

def autoregressive_component(data, lags, vector):
    
    """
    Create a matrix with lagged data.
    Perform matrix-vector multiplication.

    Parameters:
    data (numpy array): The input data array.
    lags (int): The number of lags.
    vector (numpy array): The vector to multiply with the created lagged matrix.

    Returns:
    numpy array: The result of the matrix-vector multiplication.
    """

    # Create an empty matrix with the appropriate dimensions
    rows = len(data)
    lagged_matrix = np.zeros((rows, lags + 1))

    # Loop over number of lags and roll data 
    for i in range(lags + 1):
        lagged_matrix[:, i] = np.roll(data, -i)

    # Remove rows with incomplete data
    lagged_matrix = lagged_matrix[:rows - lags]

    result = np.matmul(lagged_matrix, vector)
    return result

# Example 
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)
lags = 2
arima_vector = np.array([0.9208, -0.0111, 0.0766])

result_matrix = autoregressive_component(wind_speed, lags, arima_vector)
print("Autoregressive component:")
print(result_matrix.shape)
print(result_matrix)

Autoregressive component:
(50527,)
[ 5.59856424  5.16737458  5.57878322 ... 11.06558672  7.37994624
  8.42711736]


___
___
## 2. Integrated Component

- **d**

- This parameter is the number of times that the raw observations are differenced to make the time series stationary.
- Distribution of data is only dependent on the difference in time, not the location in time.
- Mean and variance are constant regardless of the period you pick if data is stationary.
- Differencing is a technique used to remove trends and seasonality from the data.


In [37]:
# Integrated (1)

import numpy as np

# Create a NumPy array
array = np.array([1, 2, 4, 7, 11])

# Perform differencing
difference = np.diff(array)

print(difference)

[1 2 3 4]


In [38]:
# Integrated (2)

import numpy as np

# Create a NumPy array
array = np.array([1, 22, 4, 7, 11])

# Perform differencing twice
difference = np.diff(array)
difference_twice = np.diff(difference)

print(difference_twice)

[-39  21   1]


In [61]:
import numpy as np

def integrated_component(data, order):
    """
    Perform differencing on the data.

    Parameters:
    data (numpy array): The input data array.
    order (int): The order of differencing.

    Returns:
    numpy array: The differenced data.
    """
    differenced_data = np.diff(data, n=order)
    return differenced_data

# Example
data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)
order = 1
differenced_data = integrated_component(data, order)
print("Differenced Data:", differenced_data)

Differenced Data: [-0.45613003  0.44363737 -0.08173323 ...  1.10271025  0.98600769
  0.55796623]


___
## 3. Moving Average Component

- **q**

- This parameter represents the size of the moving average window.
- It captures the relationship between an observation and a residual error from a moving average model applied to lagged observations.

In [40]:
# MA (1) component logic

import numpy as np

# Data from AR component
actual = initial_data[2:]
forecast = result_matrix

print(actual.shape)
print(forecast.shape)

# Calculate forecast errors
errors = actual - forecast

# Create lagged errors and set the first lagged error to zero
errors_lag1 = np.roll(errors, 1)
errors_lag1[0] = 0

# Parameters for MA(1) model
theta = 0.3

# Update forecasts
updated_forecast = forecast + theta * errors_lag1 

# Print the results
print(updated_forecast)
print(updated_forecast.shape)

(50527,)
(50527,)
[5.59856424 5.18570755 5.70195313 ... 9.66550717 6.59087763 9.03954321]
(50527,)


In [41]:
# Basic example with sample data

import numpy as np

# Sample data
actual = np.array([100, 105, 102, 108, 110])
forecast = np.array([98, 103, 101, 107, 109])

# Calculate forecast errors
errors = actual - forecast

# Create lagged errors
errors_lag1 = np.roll(errors, 1)
errors_lag2 = np.roll(errors, 2)

# Set the first two lagged errors to zero
errors_lag1[:1] = 0
errors_lag2[:2] = 0
print(errors_lag1)
print(errors_lag2)

# MA(2) model 
theta_1 = 0.3
theta_2 = 0.3

# Update forecasts
updated_forecast = forecast + theta_1 * errors_lag1 + theta_2 * errors_lag2

# Print the results
print("Actual:", actual)
print("Forecast:", forecast)
print("Updated Forecast:", updated_forecast)

[0 2 2 1 1]
[0 0 2 2 1]
Actual: [100 105 102 108 110]
Forecast: [ 98 103 101 107 109]
Updated Forecast: [ 98.  103.6 102.2 107.9 109.6]


In [42]:
import numpy as np

def moving_average_component_basic(actual, forecast, order, theta):
    """
    Calculate the Moving Average (MA) component of an ARIMA model and update the forecast data.

    Parameters:
    actual (array-like): Array of actual data.
    forecast (array-like): Array of forecast data.
    order (int): Order of the MA model.
    theta (array-like): Array of MA coefficients.

    Returns:
    np.ndarray: Array of updated forecast values.
    """
    actual = np.array(actual)
    forecast = np.array(forecast)
    theta = np.array(theta)
    
    # Ensure the lengths of actual and forecast are the same
    min_length = min(len(actual), len(forecast))
    actual = actual[:min_length]
    forecast = forecast[:min_length]

    # Calculate the errors
    errors = actual - forecast
    
    # Initialize the updated forecast array as float64
    updated_forecast = np.copy(forecast).astype(np.float64)
    
    # Create lagged errors and update the forecast
    for i in range(1, order + 1):
        lagged_errors = np.roll(errors, i)
        lagged_errors[:i] = 0  # Set the first 'i' elements to zero
        updated_forecast += theta[i - 1] * lagged_errors
    
    return updated_forecast

# Example usage 1

# Sample data
actual = np.array([100, 105, 102, 108, 110])
forecast = np.array([98, 103, 101, 107, 109])

# MA(2) model parameters
order = 2
theta = [0.3, 0.3]

# Update forecasts
updated_forecast = moving_average_component_basic(actual, forecast, order, theta)

# Print the results
print("Actual:", actual)
print("Forecast:", forecast)
print("Updated Forecast:", updated_forecast)


# Example usage 2

theta = [0.3, 0.3]
order = 2
initial_data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

original_data = initial_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component_basic(original_data, forecast_data, order, theta)
print("Original Data:", original_data)
print("Forecast Data:", forecast_data)
print("Updated Forecast:", updated_forecast)


Actual: [100 105 102 108 110]
Forecast: [ 98 103 101 107 109]
Updated Forecast: [ 98.  103.6 102.2 107.9 109.6]


Original Data: [5.65967417 5.57794094 5.60405207 ... 8.43535805 9.42136574 9.97933197]
Forecast Data: [ 5.59856424  5.16737458  5.57878322 ... 11.06558672  7.37994624
  8.42711736]
Updated Forecast: [5.59856424 5.18570755 5.7202861  ... 9.41554264 5.19079809 8.25047461]


In [43]:
import numpy as np

def moving_average_component(original_data, forecast_data, order, theta):
    """
    Calculate the Moving Average (MA) component of an ARIMA model and update the forecast data.

    Parameters:
    original_data (array-like): Array of original data.
    forecast_data (array-like): Array of forecast data.
    order (int): Order of the MA model.
    theta (array-like): Array of MA coefficients.

    Returns:
    np.ndarray: Array of updated forecast values.
    """
    original_data = np.array(original_data)
    forecast_data = np.array(forecast_data)
    theta = np.array(theta)
    
    # Ensure the lengths of original_data and forecast_data are the same
    min_length = min(len(original_data), len(forecast_data))
    original_data = original_data[:min_length]
    forecast_data = forecast_data[:min_length]

    # Calculate the errors
    errors = original_data - forecast_data
    
    # Create a matrix of lagged errors
    lagged_errors = np.zeros((order, len(errors)))
    # loop over MA order 
    for i in range(1, order + 1):
        lagged_errors[i - 1, i:] = errors[:-i]
    
    # Calculate the weighted sum of lagged errors
    weighted_lagged_errors = np.matmul(theta, lagged_errors)
    
    # Update the forecast
    updated_forecast = forecast_data + weighted_lagged_errors
    
    return updated_forecast

# Example usage
theta = [0.3, 0.3]
order = 2
initial_data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

original_data = initial_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)
print("Original Data:", original_data)
print("Forecast Data:", forecast_data)
print("Updated Forecast:", updated_forecast)


Original Data: [5.65967417 5.57794094 5.60405207 ... 8.43535805 9.42136574 9.97933197]
Forecast Data: [ 5.59856424  5.16737458  5.57878322 ... 11.06558672  7.37994624
  8.42711736]
Updated Forecast: [5.59856424 5.18570755 5.7202861  ... 9.41554264 5.19079809 8.25047461]


In [44]:
import numpy as np

def moving_average_component_v2(actual, forecast, order, theta):
    """
    Calculate the Moving Average (MA) component of an ARIMA model and update the forecast data.

    Parameters:
    actual (array-like): Array of actual data.
    forecast (array-like): Array of forecast data.
    order (int): Order of the MA model.
    theta (array-like): Array of MA coefficients.

    Returns:
    np.ndarray: Array of updated forecast values.
    """
    actual = np.array(actual)
    forecast = np.array(forecast)
    theta = np.array(theta)
    
    # Ensure the lengths of actual and forecast are the same
    min_length = min(len(actual), len(forecast))
    actual = actual[:min_length]
    forecast = forecast[:min_length]

    # Calculate the errors
    errors = actual - forecast
    
    # Initialize the updated forecast array as float64
    updated_forecast = np.copy(forecast).astype(np.float64)
    
    # Create lagged errors and update the forecast
    for i in range(1, order + 1):
        lagged_errors = np.roll(errors, i)
        lagged_errors[:i] = 0  # Set the first 'i' elements to zero
        updated_forecast += theta[i - 1] * lagged_errors
    
    return updated_forecast

theta = [0.3, 0.3]
order = 2
initial_data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

original_data = initial_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component_v2(original_data, forecast_data, order, theta)
print("Original Data:", original_data)
print("Forecast Data:", forecast_data)
print("Updated Forecast:", updated_forecast)


Original Data: [5.65967417 5.57794094 5.60405207 ... 8.43535805 9.42136574 9.97933197]
Forecast Data: [ 5.59856424  5.16737458  5.57878322 ... 11.06558672  7.37994624
  8.42711736]
Updated Forecast: [5.59856424 5.18570755 5.7202861  ... 9.41554264 5.19079809 8.25047461]


___
___
## ARIMA Experiments

- autoregressive models: AR(p)

- mixed autoregressive moving average models: ARMA(p, q)

- integration models: ARIMA(p, d, q)

<br/>

___

### #A - Dataset: wind_speed

In [62]:
# AR
# ARIMA (3,0,0)

# read data
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)
lags = 2
arima_vector = np.array([0.9208, -0.0111, 0.0766]) # Trained arima vector
result_matrix = autoregressive_component(wind_speed, lags, arima_vector)

error_metric = sum_of_squares_error(wind_speed[lags:], result_matrix)


# Print the results
print("### Dataset wind_speed")
print("### ARIMA (3,0,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset wind_speed
### ARIMA (3,0,0) 

Error Metric: Sum of squares
45,950.50


In [64]:
# AR + MA
# ARIMA (3,0,3)

import numpy as np

# read data
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

# AR component
lags = 2
arima_vector = np.array([-0.0723, -0.4126, -0.0111]) # Trained arima vector
result_matrix = autoregressive_component(wind_speed, lags, arima_vector)

# MA component
theta = [0.6566, 0.9961, -0.6605]
order = 3
original_data = wind_speed[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(wind_speed[lags+1:], updated_forecast)

# Print the results
print("### Dataset wind_speed")
print("### ARIMA (3,0,3) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset wind_speed
### ARIMA (3,0,3) 

Error Metric: Sum of squares
36,951.15


In [65]:
# AR + I 
# ARIMA (3,1,0)

# read data
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 1
differenced_data = integrated_component(wind_speed, diff_order)

# AR component
lags = 2
arima_vector = np.array([-0.0766, -0.0864, -0.0465]) # Trained arima vector
result_matrix = autoregressive_component(differenced_data, lags, arima_vector)

error_metric = sum_of_squares_error(differenced_data[lags:], result_matrix)

# Print the results
print("### Dataset wind_speed")
print("### ARIMA (3,1,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset wind_speed
### ARIMA (3,1,0) 

Error Metric: Sum of squares
30,883.65


In [66]:
# AR + I + MA
# ARIMA (3,1,3)

import numpy as np

# read data
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 1
differenced_data = integrated_component(wind_speed, diff_order)

# AR component
lags = 2
arima_vector = np.array([0.6566, 0.9961, -0.6605]) # Trained arima vector
result_matrix = autoregressive_component(differenced_data, lags, arima_vector)

# MA component
theta = [-0.7543, -0.9993, 0.7550]
order = 3
original_data = differenced_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(differenced_data[lags+1:], updated_forecast)

# Print the results
print("### Dataset wind_speed")
print("### ARIMA (3,1,3) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset wind_speed
### ARIMA (3,1,3) 

Error Metric: Sum of squares
288,852.29


___
### #B- Dataset: energy_generation_solar

In [49]:
# AR
# ARIMA (3,0,0)

# read data
energy_generation_solar = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/energy_generation_solar_small.csv', delimiter=',', skip_header=1)
lags = 1
lagged_matrix = create_lagged_matrix(energy_generation_solar, lags)

arima_vector = np.array([1.9920, -1.3411]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

error_metric = sum_of_squares_error(energy_generation_solar[lags:], result_matrix)

# Print the results
print("### Dataset energy_generation_solar")
print("### ARIMA (3,0,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset energy_generation_solar
### ARIMA (3,0,0) 

Error Metric: Sum of squares
65,336,245,585.35


In [50]:
# AR + I 
# ARIMA (3,1,0)

# read data
energy_generation_solar = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/energy_generation_solar_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 2
differenced_data = integrated_component(energy_generation_solar, diff_order)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(differenced_data, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

error_metric = sum_of_squares_error(differenced_data[lags:], result_matrix)

# Print the results
print("### Dataset energy_generation_solar")
print("### ARIMA (3,1,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset energy_generation_solar
### ARIMA (3,1,0) 

Error Metric: Sum of squares
22,305,125,234.48


In [51]:
# AR + MA
# ARIMA (3,0,1)

import numpy as np

# read data
energy_generation_solar = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/energy_generation_solar_small.csv', delimiter=',', skip_header=1)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(energy_generation_solar, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

# MA component
theta = [0.7, 0.7]
order = 2
original_data = energy_generation_solar[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(energy_generation_solar[lags:], updated_forecast)

# Print the results
print("### Dataset energy_generation_solar")
print("### ARIMA (3,0,1) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset energy_generation_solar
### ARIMA (3,0,1) 

Error Metric: Sum of squares
58,211,265,066.66


In [52]:
# AR + I + MA
# ARIMA (3,1,1)

import numpy as np

# read data
energy_generation_solar = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/energy_generation_solar_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 1
differenced_data = integrated_component(energy_generation_solar, diff_order)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(differenced_data, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

# MA component
theta = [0.7, 0.7]
order = 2
original_data = differenced_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(differenced_data[lags:], updated_forecast)

# Print the results
print("### Dataset energy_generation_solar")
print("### ARIMA (3,1,1) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset energy_generation_solar
### ARIMA (3,1,1) 

Error Metric: Sum of squares
40,886,542,809.99


Results:


___
### #C - Dataset: heart_rate

In [53]:
# AR
# ARIMA (3,0,0)

# read data
heart_rate = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/heart_rate_small.csv', delimiter=',', skip_header=1)
lags = 1
lagged_matrix = create_lagged_matrix(heart_rate, lags)

arima_vector = np.array([1.9920, -1.3411]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

error_metric = sum_of_squares_error(heart_rate[lags:], result_matrix)

# Print the results
print("### Dataset heart_rate")
print("### ARIMA (3,0,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset heart_rate
### ARIMA (3,0,0) 

Error Metric: Sum of squares
966,415,324.07


In [54]:
# AR + I 
# ARIMA (3,1,0)

# read data
heart_rate = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/heart_rate_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 2
differenced_data = integrated_component(heart_rate, diff_order)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(differenced_data, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

error_metric = sum_of_squares_error(differenced_data[lags:], result_matrix)

# Print the results
print("### Dataset heart_rate")
print("### ARIMA (3,1,0) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset heart_rate
### ARIMA (3,1,0) 

Error Metric: Sum of squares
71,768,375.44


In [55]:
# AR + MA
# ARIMA (3,0,1)

import numpy as np

# read data
heart_rate = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/heart_rate_small.csv', delimiter=',', skip_header=1)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(heart_rate, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

# MA component
theta = [0.7, 0.7]
order = 2
original_data = heart_rate[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(heart_rate[lags:], updated_forecast)

# Print the results
print("### Dataset heart_rate")
print("### ARIMA (3,0,1) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset heart_rate
### ARIMA (3,0,1) 

Error Metric: Sum of squares
43,788,195.14


In [56]:
# AR + I + MA
# ARIMA (3,1,1)

import numpy as np

# read data
heart_rate = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/heart_rate_small.csv', delimiter=',', skip_header=1)

# Differencing component
diff_order = 1
differenced_data = integrated_component(heart_rate, diff_order)

# AR component
lags = 2
lagged_matrix = create_lagged_matrix(differenced_data, lags)

arima_vector = np.array([1.9920, -1.3411, 0.2856]) # Trained arima vector
result_matrix = matrix_vector_multiplication(lagged_matrix, arima_vector)

# MA component
theta = [0.3, 0.7]
order = 2
original_data = differenced_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component(original_data, forecast_data, order, theta)

# Print results
error_metric = sum_of_squares_error(differenced_data[lags:], updated_forecast)

# Print the results
print("### Dataset heart_rate")
print("### ARIMA (3,1,1) \n")
print("Error Metric: Sum of squares")
print(f"{error_metric:,.2f}")

### Dataset heart_rate
### ARIMA (3,1,1) 

Error Metric: Sum of squares
53,663,022.31


___
### Get ARIMA Parameter Weights 

In [67]:
# Dataset wind_speed
 
import numpy as np
from statsmodels.tsa.arima.model import ARIMA


# Get the data in an numpy array 
wind_speed = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

print(f"Dataframe wind_speed has {wind_speed.size} rows.")

# Fit ARIMA model with only AR component (p=3, d=0, q=0)
model = ARIMA(wind_speed, order=(3, 0, 0))
arima_model = model.fit()

print(arima_model.summary())

Dataframe wind_speed has 50529 rows.
                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                50529
Model:                 ARIMA(3, 0, 0)   Log Likelihood              -56809.358
Date:                Mon, 26 Aug 2024   AIC                         113628.717
Time:                        11:34:36   BIC                         113672.868
Sample:                             0   HQIC                        113642.540
                              - 50529                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.5580      0.253     29.868      0.000       7.062       8.054
ar.L1          0.9208      0.002    574.498      0.000       0.918       0.924
ar.L2         -

In [None]:
Dataframe wind_speed has 35045 rows.

                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                35045
Model:                 ARIMA(3, 0, 0)   Log Likelihood             -245602.035
Date:                Fri, 23 Aug 2024   AIC                         491214.070
Time:                        00:51:56   BIC                         491256.392
Sample:                             0   HQIC                        491227.550
                              - 35045                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1432.7054     25.785     55.564      0.000    1382.168    1483.243
ar.L1          1.9920      0.002    878.019      0.000       1.988       1.996
ar.L2         -1.3411      0.003   -394.514      0.000      -1.348      -1.334
ar.L3          0.2856      0.002    127.535      0.000       0.281       0.290
sigma2      7.157e+04    202.462    353.494      0.000    7.12e+04     7.2e+04
===================================================================================
Ljung-Box (L1) (Q):                 134.53   Jarque-Bera (JB):           3117759.51
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               1.14   Skew:                             1.55
Prob(H) (two-sided):                  0.00   Kurtosis:                        49.10
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

In [68]:
# Dataset energy_generation_solar
 
import numpy as np
from statsmodels.tsa.arima.model import ARIMA


# Get the data in an numpy array 
energy_generation_solar = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/energy_generation_solar_small.csv', delimiter=',', skip_header=1)

print(f"Dataframe energy_generation_solar has {energy_generation_solar.size} rows.")

# Fit ARIMA model with only AR component (p=3, d=0, q=0)
model = ARIMA(energy_generation_solar, order=(3, 0, 0))
arima_model = model.fit()

print(arima_model.summary())

Dataframe energy_generation_solar has 35045 rows.
                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                35045
Model:                 ARIMA(3, 0, 0)   Log Likelihood             -245602.035
Date:                Mon, 26 Aug 2024   AIC                         491214.070
Time:                        11:35:04   BIC                         491256.392
Sample:                             0   HQIC                        491227.550
                              - 35045                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1432.7054     25.785     55.564      0.000    1382.168    1483.243
ar.L1          1.9920      0.002    878.019      0.000       1.988       1.996
ar

In [None]:
Dataframe energy_generation_solar has 35045 rows.

                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                35045
Model:                 ARIMA(3, 0, 0)   Log Likelihood             -245602.035
Date:                Fri, 23 Aug 2024   AIC                         491214.070
Time:                        00:52:46   BIC                         491256.392
Sample:                             0   HQIC                        491227.550
                              - 35045                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1432.7054     25.785     55.564      0.000    1382.168    1483.243
ar.L1          1.9920      0.002    878.019      0.000       1.988       1.996
ar.L2         -1.3411      0.003   -394.514      0.000      -1.348      -1.334
ar.L3          0.2856      0.002    127.535      0.000       0.281       0.290
sigma2      7.157e+04    202.462    353.494      0.000    7.12e+04     7.2e+04
===================================================================================
Ljung-Box (L1) (Q):                 134.53   Jarque-Bera (JB):           3117759.51
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               1.14   Skew:                             1.55
Prob(H) (two-sided):                  0.00   Kurtosis:                        49.10
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

In [69]:
# Dataset heart_rate
 
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

# Get the data in an numpy array 
heart_rate = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/heart_rate_small.csv', delimiter=',', skip_header=1)

print(f"Dataframe heart_rate has {heart_rate.size} rows.")

# Fit ARIMA model with only AR component (p=3, d=0, q=0)
model = ARIMA(heart_rate, order=(3, 0, 0))
arima_model = model.fit()

print(arima_model.summary())

Dataframe heart_rate has 1154680 rows.
                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:              1154680
Model:                 ARIMA(3, 0, 0)   Log Likelihood            -2444762.852
Date:                Mon, 26 Aug 2024   AIC                        4889535.704
Time:                        11:35:48   BIC                        4889595.501
Sample:                             0   HQIC                       4889552.066
                            - 1154680                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         79.7569      0.312    255.567      0.000      79.145      80.369
ar.L1          1.0247      0.000   3374.100      0.000       1.024       1.025
ar.L2        

___
___
## Notes

___
## Archive

In [60]:
import numpy as np

def moving_average_component_v1(original_data, forecast_data, order, theta):
    """
    Calculate the Moving Average (MA) component of an ARIMA model and update the forecast data.

    Parameters:
    original_data (array-like): Array of original data.
    forecast_data (array-like): Array of forecast data.
    order (int): Order of the MA model.
    theta (array-like): Array of MA coefficients.

    Returns:
    np.ndarray: Array of updated forecast values.
    """
    original_data = np.array(original_data)
    forecast_data = np.array(forecast_data)
    theta = np.array(theta)
    
    # Ensure the lengths of original_data and forecast_data are the same
    min_length = min(len(original_data), len(forecast_data))
    original_data = original_data[:min_length]
    forecast_data = forecast_data[:min_length]

    # Calculate the errors
    errors = original_data - forecast_data   
    # Create a matrix where each row is a shifted version of the errors array
    shifted_errors = np.array([np.roll(errors, i) for i in range(order)]).T
    # Set the first 'order' rows to zero to handle the initial condition
    shifted_errors[:order, :] = 0
    
    # Calculate the updated forecast
    updated_forecast = np.copy(forecast_data)
    updated_forecast[order:] += np.dot(shifted_errors[order:], theta)

    return updated_forecast

# Example usage

theta = [0.3]
order = 1
initial_data = np.genfromtxt('/Users/niklas/Documents/GitHub/Uni/10_Masterarbeit/data_files/prepocessed_datafiles/wind_speed_small.csv', delimiter=',', skip_header=1)

original_data = initial_data[order:]
forecast_data = result_matrix

updated_forecast = moving_average_component_v1(original_data, forecast_data, order, theta)
print("Original Data:", original_data)
print("Forecast Data:", forecast_data)
print("Updated Forecast:", updated_forecast)

# wrong output!

Original Data: [5.2160368  5.65967417 5.57794094 ... 8.43535805 9.42136574 9.97933197]
Forecast Data: [ 7.849   1.5874  3.4997 ...  1.0079 -2.7702 -8.2143]
Updated Forecast: [ 7.849       2.80908225  4.12317228 ...  4.23573741 -3.60666028
  3.27701959]
