In [43]:
import numpy as np
import time


# Load the datasets
# data_a = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/wind_speed_small.csv', delimiter=',')
# data_b = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/energy_generation_solar_small.csv', delimiter=',', skiprows=1)
# data_c = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/heart_rate_small.csv', delimiter=',', skiprows=1)
# data_d = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/temperature_delhi_small.csv', delimiter=',', skiprows=1)
# data_e = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/stock_open_microsoft.csv', delimiter=',', skiprows=1)
# data_f = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/preprocessed_data_files/nyctaxitraffic_small.csv', delimiter=',', skiprows=1)

# Load the datasets
#data_a = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/wind_speed.csv', delimiter=',', skiprows=0)
# data_b = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/energy_generation_solar.csv', delimiter=',', skiprows=0)
# data_c = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/heart_rate.csv', delimiter=',', skiprows=0)
# data_d = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/temperature_delhi.csv', delimiter=',', skiprows=0)
# data_e = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/stock_open_microsoft.csv', delimiter=',', skiprows=0)
data_f = np.loadtxt('/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/nyctaxitraffic.csv', delimiter=',', skiprows=0)


# Define the parameters for testing
lags = 2
vector = np.array([0.9208, -0.0111, 0.0766])
order = 1
theta = np.array([0.3, 0.3])

# Expected outputs
expected_output_ar_a = 8.42711736
expected_output_integrated_a = 0.55796623
expected_output_ma_a = 8.25047461
expected_output_complete_a = 3.059677

expected_output_ar_b = 33.09027077
expected_output_integrated_b = 1.23456789
expected_output_ma_b = 33.56789012
expected_output_complete_b = 61.78846

expected_output_ar_c = 98.77752695
expected_output_integrated_c = 2.34567890
expected_output_ma_c = 99.12345678
expected_output_complete_c = -6.16423

expected_output_complete_d = -1.066503

expected_output_complete_e = 6.920413

expected_output_complete_f = 1143.753

In [39]:
import numpy as np

#####
# Autoregressive Component

def autoregressive_component_basic(data, lags, vector):
    rows = len(data)
    lagged_matrix = np.zeros((rows, lags + 1))

    for i in range(lags + 1):
        for j in range(rows):
            if j + i < rows:
                lagged_matrix[j, i] = data[j + i]

    lagged_matrix = lagged_matrix[:rows - lags]

    result = np.zeros(len(lagged_matrix))
    for i in range(len(lagged_matrix)):
        for j in range(len(vector)):
            result[i] += lagged_matrix[i, j] * vector[j]

    return result


def autoregressive_component_basic_v2(data, lags, vector):
    rows = len(data)
    result = np.zeros(rows - lags)

    for i in range(rows - lags):
        for j in range(lags + 1):
            result[i] += data[i + j] * vector[j]

    return result

def autoregressive_component_vectorized(data, lags, vector):

    # Create an empty matrix with the appropriate dimensions
    rows = len(data)
    lagged_matrix = np.zeros((rows, lags + 1))

    # Loop over number of lags and roll data 
    for i in range(lags + 1):
        lagged_matrix[:, i] = np.roll(data, -i)

    # Remove rows with incomplete data
    lagged_matrix = lagged_matrix[:rows - lags]

    result = np.matmul(lagged_matrix, vector)
    return result


#####
# Integrated Component

def integrated_component_basic(data, order):

    differenced_data = data.copy()
    for _ in range(order):
        differenced_data = [differenced_data[i] - differenced_data[i - 1] for i in range(1, len(differenced_data))]
    return np.array(differenced_data)


def integrated_component_vectorized(data, order):

    differenced_data = np.diff(data, n=order)
    return differenced_data


#####
# Moving Average Component

def moving_average_component_basic(original_data, forecast_data, order, theta):

    original_data = np.array(original_data)
    forecast_data = np.array(forecast_data)
    theta = np.array(theta)
    
    min_length = min(len(original_data), len(forecast_data))
    original_data = original_data[:min_length]
    forecast_data = forecast_data[:min_length]

    errors = original_data - forecast_data
    updated_forecast = forecast_data.copy()

    for t in range(order, len(errors)):
        weighted_sum = 0
        for i in range(1, order + 1):
            weighted_sum += theta[i - 1] * errors[t - i]
        updated_forecast[t] += weighted_sum

    return updated_forecast


def moving_average_component_vectorized(original_data, forecast_data, order, theta):

    original_data = np.array(original_data)
    forecast_data = np.array(forecast_data)
    theta = np.array(theta)
    
    # Ensure the lengths of original_data and forecast_data are the same
    min_length = min(len(original_data), len(forecast_data))
    original_data = original_data[:min_length]
    forecast_data = forecast_data[:min_length]

    # Calculate the errors
    errors = original_data - forecast_data
    
    # Create a matrix of lagged errors
    lagged_errors = np.zeros((order, len(errors)))
    # loop over MA order 
    for i in range(1, order + 1):
        lagged_errors[i - 1, i:] = errors[:-i]
    
    # Calculate the weighted sum of lagged errors
    weighted_lagged_errors = np.matmul(theta, lagged_errors)
    
    # Update the forecast
    updated_forecast = forecast_data + weighted_lagged_errors
    
    return updated_forecast


def moving_average_component_vectorized_v2(actual, forecast, order, theta):

    actual = np.array(actual)
    forecast = np.array(forecast)
    theta = np.array(theta)
    
    # Ensure the lengths of actual and forecast are the same
    min_length = min(len(actual), len(forecast))
    actual = actual[:min_length]
    forecast = forecast[:min_length]
    
    # Calculate the errors
    errors = actual - forecast
    
    # Adjust for errors from AR component
    lagged_errors = np.zeros((order, len(errors)))
    for i in range(1, order + 1):
        lagged_errors[i - 1, i:] = errors[:-i]
    weighted_lagged_errors = np.dot(theta, lagged_errors)
    updated_forecast = forecast + weighted_lagged_errors
    
    # Adjust for errors from own MA component
    updated_errors = actual - updated_forecast # TODO: Oder forecast - updated forecast
    lagged_updated_errors = np.zeros((order, len(updated_errors)))
    for i in range(1, order + 1):
        lagged_updated_errors[i - 1, i:] = updated_errors[:-i]
    weighted_updated_errors = np.dot(theta, lagged_updated_errors)
    updated_forecast += weighted_updated_errors
    
    return updated_forecast


In [27]:

#def test_arima_complete_basic_dataset_a():
start_time = time.time()
integrated_data = integrated_component_basic(data_a, order)
forecast_data = autoregressive_component_basic(integrated_data, lags, vector)
result = moving_average_component_basic(integrated_data[(lags + 1):], forecast_data, lags, theta)
last_result_a = result[-1]
end_time = time.time()
execution_time = end_time - start_time
if abs(last_result_a - expected_output_complete_a) < 0.00005:
    print("test_arima_complete_basic_dataset_a: PASSED:", last_result_a)
    print("execution time:", execution_time)
else:
    print("test_arima_complete_basic_dataset_a: FAILED")
    print(last_result_a)
    print("execution time:", execution_time)


test_arima_complete_basic_dataset_a: FAILED
-2.707527627339375
execution time: 234.01925683021545


In [26]:
#def test_arima_complete_vectorized_dataset_a():
start_time = time.time()
integrated_data = integrated_component_vectorized(data_a, order)
forecast_data = autoregressive_component_vectorized(integrated_data, lags, vector)
result = moving_average_component_vectorized(integrated_data[(lags + 1):], forecast_data, lags, theta)
last_result_a = result[-1]
end_time = time.time()
execution_time = end_time - start_time
print(last_result_a)
if abs(last_result_a - expected_output_complete_a) < 0.00005:
    print("test_arima_complete_vectorized_dataset_a: PASSED:", last_result_a)
    print("execution time:", execution_time)
else:
    print("test_arima_complete_vectorized_dataset_a: FAILED")
    print(last_result_a)
    print("execution time:", execution_time)


-2.707527627339375
test_arima_complete_vectorized_dataset_a: FAILED
-2.707527627339375
execution time: 8.695611953735352


In [30]:
# Dataset B
start_time = time.time()
integrated_data_b = integrated_component_basic(data_b, order)
forecast_data_b = autoregressive_component_basic(integrated_data_b, lags, vector)
result_b = moving_average_component_basic(integrated_data_b[(lags + 1):], forecast_data_b, lags, theta)
last_result_b = result_b[-1]
end_time = time.time()
execution_time_b = end_time - start_time
if abs(last_result_b - expected_output_complete_b) < 0.00005:
    print("test_arima_complete_basic_dataset_b: PASSED:", last_result_b)
    print("execution time:", execution_time_b)
else:
    print("test_arima_complete_basic_dataset_b: FAILED")
    print(last_result_b)
    print("execution time:", execution_time_b)

test_arima_complete_basic_dataset_b: FAILED
293.16202999999996
execution time: 168.16781091690063


In [29]:
start_time = time.time()
integrated_data_b = integrated_component_vectorized(data_b, order)
forecast_data_b = autoregressive_component_vectorized(integrated_data_b, lags, vector)
result_b = moving_average_component_vectorized(integrated_data_b[(lags + 1):], forecast_data_b, lags, theta)
last_result_b = result_b[-1]
end_time = time.time()
execution_time_b = end_time - start_time
print(last_result_b)
if abs(last_result_b - expected_output_complete_b) < 0.00005:
    print("test_arima_complete_vectorized_dataset_b: PASSED:", last_result_b)
    print("execution time:", execution_time_b)
else:
    print("test_arima_complete_vectorized_dataset_b: FAILED")
    print(last_result_b)
    print("execution time:", execution_time_b)

293.16202999999996
test_arima_complete_vectorized_dataset_b: FAILED
293.16202999999996
execution time: 5.534387826919556


In [34]:
# Dataset C
start_time = time.time()
integrated_data_c = integrated_component_basic(data_c, order)
forecast_data_c = autoregressive_component_basic(integrated_data_c, lags, vector)
result_c = moving_average_component_basic(integrated_data_c[(lags + 1):], forecast_data_c, lags, theta)
last_result_c = result_c[-1]
end_time = time.time()
execution_time_c = end_time - start_time
if abs(last_result_c - expected_output_complete_c) < 0.00005:
    print("test_arima_complete_basic_dataset_c: PASSED:", last_result_c)
    print("execution time:", execution_time_c)
else:
    print("test_arima_complete_basic_dataset_c: FAILED")
    print(last_result_c)
    print("execution time:", execution_time_c)

test_arima_complete_basic_dataset_c: FAILED
-6.40901
execution time: 329.0239520072937


In [33]:
start_time = time.time()
integrated_data_c = integrated_component_vectorized(data_c, order)
forecast_data_c = autoregressive_component_vectorized(integrated_data_c, lags, vector)
result_c = moving_average_component_vectorized(integrated_data_c[(lags + 1):], forecast_data_c, lags, theta)
last_result_c = result_c[-1]
end_time = time.time()
execution_time_c = end_time - start_time
print(last_result_c)
if abs(last_result_c - expected_output_complete_c) < 0.00005:
    print("test_arima_complete_vectorized_dataset_c: PASSED:", last_result_c)
    print("execution time:", execution_time_c)
else:
    print("test_arima_complete_vectorized_dataset_c: FAILED")
    print(last_result_c)
    print("execution time:", execution_time_c)

-6.40901
test_arima_complete_vectorized_dataset_c: FAILED
-6.40901
execution time: 12.904079675674438


In [37]:
# Dataset D
start_time = time.time()
integrated_data_d = integrated_component_basic(data_d, order)
forecast_data_d = autoregressive_component_basic(integrated_data_d, lags, vector)
result_d = moving_average_component_basic(integrated_data_d[(lags + 1):], forecast_data_d, lags, theta)
last_result_d = result_d[-1]
end_time = time.time()
execution_time_d = end_time - start_time
if abs(last_result_d - expected_output_complete_d) < 0.00005:
    print("test_arima_complete_basic_dataset_d: PASSED:", last_result_d)
    print("execution time:", execution_time_d)
else:
    print("test_arima_complete_basic_dataset_d: FAILED")
    print(last_result_d)
    print("execution time:", execution_time_d)

test_arima_complete_basic_dataset_d: FAILED
-1.7463627392938856
execution time: 107.31236505508423


In [36]:

start_time = time.time()
integrated_data_d = integrated_component_vectorized(data_d, order)
forecast_data_d = autoregressive_component_vectorized(integrated_data_d, lags, vector)
result_d = moving_average_component_vectorized(integrated_data_d[(lags + 1):], forecast_data_d, lags, theta)
last_result_d = result_d[-1]
end_time = time.time()
execution_time_d = end_time - start_time
print(last_result_d)
if abs(last_result_d - expected_output_complete_d) < 0.00005:
    print("test_arima_complete_vectorized_dataset_d: PASSED:", last_result_d)
    print("execution time:", execution_time_d)
else:
    print("test_arima_complete_vectorized_dataset_d: FAILED")
    print(last_result_d)
    print("execution time:", execution_time_d)

-1.7463627392938856
test_arima_complete_vectorized_dataset_d: FAILED
-1.7463627392938856
execution time: 2.063900947570801


In [41]:
# Dataset E
start_time = time.time()
integrated_data_e = integrated_component_basic(data_e, order)
forecast_data_e = autoregressive_component_basic(integrated_data_e, lags, vector)
result_e = moving_average_component_basic(integrated_data_e[(lags + 1):], forecast_data_e, lags, theta)
last_result_e = result_e[-1]
end_time = time.time()
execution_time_e = end_time - start_time
if abs(last_result_e - expected_output_complete_e) < 0.00005:
    print("test_arima_complete_basic_dataset_e: PASSED:", last_result_e)
    print("execution time:", execution_time_e)
else:
    print("test_arima_complete_basic_dataset_e: FAILED")
    print(last_result_e)
    print("execution time:", execution_time_e)

test_arima_complete_basic_dataset_e: FAILED
-2.5949976000000037
execution time: 61.96626114845276


In [42]:
# Dataset E v2
start_time = time.time()
integrated_data_e = integrated_component_basic(data_e, order)
forecast_data_e = autoregressive_component_basic_v2(integrated_data_e, lags, vector)
result_e = moving_average_component_basic(integrated_data_e[(lags + 1):], forecast_data_e, lags, theta)
last_result_e = result_e[-1]
end_time = time.time()
execution_time_e = end_time - start_time
if abs(last_result_e - expected_output_complete_e) < 0.00005:
    print("test_arima_complete_basic_dataset_e: PASSED:", last_result_e)
    print("execution time:", execution_time_e)
else:
    print("test_arima_complete_basic_dataset_e: FAILED")
    print(last_result_e)
    print("execution time:", execution_time_e)

test_arima_complete_basic_dataset_e: FAILED
-2.5949976000000037
execution time: 45.764225006103516


In [40]:
start_time = time.time()
integrated_data_e = integrated_component_vectorized(data_e, order)
forecast_data_e = autoregressive_component_vectorized(integrated_data_e, lags, vector)
result_e = moving_average_component_vectorized(integrated_data_e[(lags + 1):], forecast_data_e, lags, theta)
last_result_e = result_e[-1]
end_time = time.time()
execution_time_e = end_time - start_time
print(last_result_e)
if abs(last_result_e - expected_output_complete_e) < 0.00005:
    print("test_arima_complete_vectorized_dataset_e: PASSED:", last_result_e)
    print("execution time:", execution_time_e)
else:
    print("test_arima_complete_vectorized_dataset_e: FAILED")
    print(last_result_e)
    print("execution time:", execution_time_e)

-2.5949976000000037
test_arima_complete_vectorized_dataset_e: FAILED
-2.5949976000000037
execution time: 1.5847039222717285


In [45]:
# Dataset F
start_time = time.time()
integrated_data_f = integrated_component_basic(data_f, order)
forecast_data_f = autoregressive_component_basic(integrated_data_f, lags, vector)
result_f = moving_average_component_basic(integrated_data_f[(lags + 1):], forecast_data_f, lags, theta)
last_result_f = result_f[-1]
end_time = time.time()
execution_time_f = end_time - start_time
if abs(last_result_f - expected_output_complete_f) < 0.005:
    print("test_arima_complete_basic_dataset_f: PASSED:", last_result_f)
    print("execution time:", execution_time_f)
else:
    print("test_arima_complete_basic_dataset_f: FAILED")
    print(last_result_f)
    print("execution time:", execution_time_f)

test_arima_complete_basic_dataset_f: FAILED
721.2226800000001
execution time: 284.3350999355316


In [44]:
start_time = time.time()
integrated_data_f = integrated_component_vectorized(data_f, order)
forecast_data_f = autoregressive_component_vectorized(integrated_data_f, lags, vector)
result_f = moving_average_component_vectorized(integrated_data_f[(lags + 1):], forecast_data_f, lags, theta)
last_result_f = result_f[-1]
end_time = time.time()
execution_time_f = end_time - start_time
print(last_result_f)
if abs(last_result_f - expected_output_complete_f) < 0.005:
    print("test_arima_complete_vectorized_dataset_f: PASSED:", last_result_f)
    print("execution time:", execution_time_f)
else:
    print("test_arima_complete_vectorized_dataset_f: FAILED")
    print(last_result_f)
    print("execution time:", execution_time_f)

721.2226800000001
test_arima_complete_vectorized_dataset_f: FAILED
721.2226800000001
execution time: 9.943871259689331
