In [2]:
using CSV
using DataFrames
using Dates

In [6]:
# autoregressive_component

function autoregressive_component_basic(data::Vector{Float64}, lags::Int, vector::Vector{Float64})
    rows = length(data)
    lagged_matrix = zeros(Float64, rows, lags + 1)
    
    for i in 0:lags
        for j in 1:rows
            if j + i <= rows
                lagged_matrix[j, i + 1] = data[j + i]
            end
        end
    end
    
    lagged_matrix = lagged_matrix[1:(rows - lags), :]
    
    result = zeros(Float64, size(lagged_matrix, 1))
    for i in 1:size(lagged_matrix, 1)
        for j in 1:length(vector)
            result[i] += lagged_matrix[i, j] * vector[j]
        end
    end
    
    return result
end


function autoregressive_component_vectorized(data::Vector{Float64}, lags::Int, vector::Vector{Float64})
    rows = length(data)
    lagged_matrix = zeros(Float64, rows, lags + 1)
    
    for i in 0:lags
        lagged_matrix[:, i + 1] = circshift(data, -i)
    end
    
    # Remove rows with incomplete data
    lagged_matrix = lagged_matrix[1:(rows - lags), :]
    
    result = lagged_matrix * vector
    return result
end

# integrated_component

function integrated_component_basic(data::Vector{Float64}, order::Int)
    differenced_data = copy(data)
    for _ in 1:order
        new_data = Float64[]
        for i in 2:length(differenced_data)
            push!(new_data, differenced_data[i] - differenced_data[i - 1])
        end
        differenced_data = new_data
    end    
    return differenced_data
end


function integrated_component_vectorized(data::Vector{Float64}, order::Int)
    differenced_data = data
    for _ in 1:order
        differenced_data = diff(differenced_data)
    end    
    return differenced_data
end

# moving_average_component

function moving_average_component_basic(original_data::Vector{Float64}, forecast_data::Vector{Float64}, order::Int, theta::Vector{Float64})
    min_length = min(length(original_data), length(forecast_data))
    original_data = original_data[1:min_length]
    forecast_data = forecast_data[1:min_length]
    
    errors = original_data - forecast_data
    updated_forecast = copy(forecast_data)
    
    for t in (order + 1):length(errors)
        weighted_sum = 0.0
        for i in 1:order
            weighted_sum += theta[i] * errors[t - i]
        end
        updated_forecast[t] += weighted_sum
    end
    
    return updated_forecast
end


function moving_average_component_vectorized(original_data::Vector{Float64}, forecast_data::Vector{Float64}, order::Int, theta::Vector{Float64})
    min_length = min(length(original_data), length(forecast_data))
    original_data = original_data[1:min_length]
    forecast_data = forecast_data[1:min_length]
    
    errors = original_data - forecast_data
    lagged_errors = zeros(Float64, order, length(errors))
    
    for i in 1:order
        lagged_errors[i, (i + 1):end] = errors[1:(end - i)]
    end
    
    weighted_lagged_errors = theta' * lagged_errors
    
    updated_forecast = forecast_data .+ weighted_lagged_errors
    return updated_forecast
end


# function moving_average_component_vectorized(original_data::Vector{Float64}, forecast_data::Vector{Float64}, order::Int, theta::Vector{Float64})
#     min_length = min(length(original_data), length(forecast_data))
#     original_data = original_data[1:min_length]
#     forecast_data = forecast_data[1:min_length]

#     # Adjust for errors from AR component
#     errors = original_data - forecast_data
#     lagged_errors = zeros(Float64, order, length(errors))
#     for i in 1:order
#         lagged_errors[i, (i + 1):end] = errors[1:(end - i)]
#     end
#     weighted_lagged_errors = theta' * lagged_errors
#     updated_forecast = forecast_data .+ weighted_lagged_errors

#     # Adjust for errors from own MA component
#     updated_errors = forecast_data - updated_forecast
#     lagged_updated_errors = zeros(Float64, order, length(updated_errors))
#     for i in 1:order
#         lagged_updated_errors[i, (i + 1):end] = updated_errors[1:(end - i)]
#     end
#     weighted_updated_errors = theta' * lagged_updated_errors
#     updated_forecast .+= weighted_updated_errors

#     return updated_forecast
# end


# function moving_average_component_vectorized(original_data::Vector{Float64}, forecast_data::Vector{Float64}, order::Int, theta::Vector{Float64})
#     min_length = min(length(original_data), length(forecast_data))
#     original_data = original_data[1:min_length]
#     forecast_data = forecast_data[1:min_length]

#     # Adjust for errors from AR component
#     errors = original_data - forecast_data
#     lagged_errors = zeros(Float64, order, min_length)
#     for i in 1:order
#         lagged_errors[i, (i + 1):end] .= errors[1:(end - i)]
#     end
#     weighted_lagged_errors = theta' * lagged_errors
#     updated_forecast = forecast_data .+ weighted_lagged_errors

#     # Adjust for errors from own MA component
#     updated_errors = forecast_data .- updated_forecast
#     lagged_updated_errors = zeros(Float64, order, min_length)
#     for i in 1:order
#         lagged_updated_errors[i, (i + 1):end] .= updated_errors[1:(end - i)]
#     end
#     weighted_updated_errors = theta' * lagged_updated_errors
#     updated_forecast .+= weighted_updated_errors

#     return updated_forecast
# end


moving_average_component_vectorized (generic function with 1 method)

In [10]:

# data_a = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/wind_speed.csv", DataFrame)[:, 1]
# data_b = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/energy_generation_solar.csv", DataFrame)[:, 1]
# data_c = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/heart_rate.csv", DataFrame)[:, 1]
# data_d = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/temperature_delhi.csv", DataFrame)[:, 1]
# data_e = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/stock_open_microsoft.csv", DataFrame)[:, 1]
# data_f = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/nyctaxitraffic.csv", DataFrame)[:, 1]

# Define the parameters for testing
lags = 1
vector = [0.9208, -0.0111]
order = 0
theta = [0.3]

# Expected outputs
expected_output_ar_a = 8.42711736
expected_output_integrated_a = 0.55796623
expected_output_ma_a = 8.25047461
expected_output_complete_a = 3.059677

expected_output_ar_b = 33.09027077
expected_output_integrated_b = 1.23456789
expected_output_ma_b = 33.56789012
expected_output_complete_b = 61.78846

expected_output_ar_c = 98.77752695
expected_output_integrated_c = 2.34567890
expected_output_ma_c = 99.12345678
expected_output_complete_c = -6.16423

expected_output_complete_d = -1.066503

expected_output_complete_e = 6.920413

expected_output_complete_f = 1143.753

1143.753

In [6]:
#function test_arima_complete_basic_dataset_a()
    start_time = now()
    integrated_data = integrated_component_basic(data_a, order)
    forecast_data = autoregressive_component_basic(integrated_data, lags, vector)
    result = moving_average_component_basic(integrated_data[lags + 1:end], forecast_data, lags, theta)
    last_result_a = result[end]
    end_time = now()
    execution_time = Dates.value(end_time - start_time) / 1000
    if abs(last_result_a - expected_output_complete_a) < 0.00005
      println("test_arima_complete_basic_dataset_a: PASSED: ", last_result_a)
      println("execution time:", execution_time)
      return true
    else
      println("test_arima_complete_basic_dataset_a: FAILED")
      println(last_result_a)
      return false
    end
#end

test_arima_complete_basic_dataset_a: PASSED: 3.0596765181255368
execution time:5.808


true

In [7]:
#function test_arima_complete_vectorized_dataset_a()
    start_time = now()
    integrated_data = integrated_component_vectorized(data_a, order)
    forecast_data = autoregressive_component_vectorized(integrated_data, lags, vector)
    result = moving_average_component_vectorized(integrated_data[lags + 1:end], forecast_data, lags, theta)
    last_result_a = result[end, end]
    end_time = now()
    execution_time = Dates.value(end_time - start_time) / 1000
    println(last_result_a)
    if abs(last_result_a - expected_output_complete_a) < 0.00005
        println("test_arima_complete_vectorized_dataset_d: PASSED: ", last_result_a)
        println("execution time:", execution_time)
        return true
    else
        println("test_arima_complete_vectorized_dataset_a: FAILED")
        println(last_result_a)
        return false
    end
#end

OutOfMemoryError: OutOfMemoryError()

In [12]:
data_f = CSV.read("/Users/niklas/Documents/GitHub/MasterThesis/0_Data_files/replicated_preprocessed_data_files/nyctaxitraffic.csv", DataFrame)[:, 1]


#function test_arima_complete_basic_dataset_f()
    start_time = now()
    integrated_data = integrated_component_basic(data_f, order)
    forecast_data = autoregressive_component_basic(integrated_data, lags, vector)
    result = moving_average_component_basic(integrated_data[lags + 1:end], forecast_data, lags, theta)
    last_result_f = result[end]
    end_time = now()
    execution_time = Dates.value(end_time - start_time) / 1000
    if abs(last_result_f - expected_output_complete_f) < 0.005
      println("test_arima_complete_basic_dataset_f: PASSED: ", last_result_f)
      println("execution time:", execution_time)
      return true
    else
      println("test_arima_complete_basic_dataset_f: FAILED")
      println(last_result_f)
      println("execution time:", execution_time)
      return false
    end
#  end


test_arima_complete_basic_dataset_f: FAILED
24715.20587
execution time:5.432


false

In [14]:
  
  #function test_arima_complete_vectorized_dataset_f()
    start_time = now()
    integrated_data = integrated_component_vectorized(data_f, order)
    forecast_data = autoregressive_component_vectorized(integrated_data, lags, vector)
    result = moving_average_component_vectorized(integrated_data[lags + 1:end], forecast_data, lags, theta)
    last_result_f = result[end, end]
    end_time = now()
    execution_time = Dates.value(end_time - start_time) / 1000
    println(last_result_f)
    if abs(last_result_f - expected_output_complete_f) < 0.005
      println("test_arima_complete_vectorized_dataset_f: PASSED: ", last_result_f)
      println("execution time:", execution_time)
      return true
    else
      println("test_arima_complete_vectorized_dataset_f: FAILED")
      println(last_result_f)
      return false
    end
  #end

OutOfMemoryError: OutOfMemoryError()