# **Regression**

1.  Basic Part: Implement a regression model to predict the number of dengue cases

> *   Step 1: Split Data
> *   Step 2: Preprocess Data
> *   Step 3: Implement Regression
> *   Step 4: Make Prediction
> *   Step 5: Train Model and Generate Result

2.  Advanced Part: Implement a regression model to predict the number of dengue cases in a different way than the basic part

# 1. Basic Part

Implement a regression model to predict the number of dengue cases

## Import Packages

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import math
import random

## Global attributes

Define the global attributes

In [2]:
input_dataroot = 'basic_input.csv'
output_dataroot = 'basic.csv'

input_datalist =  [] # Initial datalist, saved as numpy array
output_datalist =  [] # Your prediction, should be 10 * 4 matrix and saved as numpy array
# The format of each row should be ['epiweek', 'CityA', 'CityB', 'CityC']

In [3]:
# Global variables
x = []
y = []
x_train = []
y_train = []
x_validation = []
y_validation = []
x_test = []
y_test = []
w_basic = []
degree = 2
y_terms = 3

## Load the Input File

First, load the basic input file **basic_input.csv**

Input data would be stored in *input_datalist*

In [4]:
# Read input csv to datalist
with open(input_dataroot, newline='') as csvfile:
  input_datalist = np.array(list(csv.reader(csvfile)))


## Implement the Regression Model


### Step 1: Split Data

Split data in *input_datalist* into training dataset and validation dataset

In [5]:
def SplitData():
  global x, y, x_train, x_test, y_train, y_test

  # Split input & output
  x = input_datalist[1:105, 1:4]
  y = input_datalist[1:105, 4:7]

  # Split training data & testing data
  x_train = x[:94]
  y_train = y[:94]
  x_test = x[94:]
  y_test = y[94:]

  return

### Step 2: Preprocess Data

Handle the unreasonable data

Outlier and missing data can be handled by removing the data or adding the values with the help of statistics  

In [6]:
def PreprocessData():
  global x_train, y_train, x_validation, y_validation

  # Filter missing values
  temp_x = []
  for idx in range(0, 3):
    temp_x.append([float(x) for x in x_train[:, idx] if x])
  temp_y = []
  for idx in range(0, 3):
    temp_y.append([float(y) for y in y_train[:, idx] if y])

  # Quartiles and IQR
  q25_x = []
  for idx in range(0, 3):
    q25_x.append(np.percentile(temp_x[idx][:], 25))
  q75_x = []
  for idx in range(0, 3):
    q75_x.append(np.percentile(temp_x[idx][:], 75))
  iqr_x = []
  for idx in range(0, 3):
    iqr_x.append(q75_x[idx] - q25_x[idx])
  q25_y = []
  for idx in range(0, 3):
    q25_y.append(np.percentile(temp_y[idx][:], 25))
  q75_y = []
  for idx in range(0, 3):
    q75_y.append(np.percentile(temp_y[idx][:], 75))
  iqr_y = []
  for idx in range(0, 3):
    iqr_y.append(q75_y[idx] - q25_y[idx])

  #  Filter outliers
  for idx in range(0, 3):
    temp_x[idx] = [x for x in temp_x[idx] if not(x < q25_x[idx] - 1.5*iqr_x[idx] or x > q75_x[idx] + 1.5*iqr_x[idx])]
  for idx in range(0, 3):
    temp_y[idx] = [y for y in temp_y[idx] if not(y < q25_y[idx] - 1.5*iqr_y[idx] or y > q75_y[idx] + 1.5*iqr_y[idx])]

  # Mean values
  mean_x = []
  for idx in range(0, 3):
    mean_x.append(np.mean(temp_x[idx]))
  mean_y = []
  for idx in range(0, 3):
    mean_y.append(np.mean(temp_y[idx]))

  # Standard deviations
  sd_x = []
  for idx in range(0, 3):
    sd_x.append(np.std(temp_x[idx]))
  sd_y = []
  for idx in range(0, 3):
    sd_y.append(np.std(temp_y[idx]))

  # Deal with missing values

  for x in x_train:
    for idx in range(0, 3):
      if x[idx] == '':
        x[idx] = mean_x[idx]
  for y in y_train:
    for idx in range(0, 3):
      if y[idx] == '':
        y[idx] = mean_y[idx]

  # Deal with outliers
  
  # Interquartile range method
  for x in x_train:
    for idx in range(0, 3):
      if float(x[idx]) < q25_x[idx] - 1.5*iqr_x[idx] or float(x[idx]) > q75_x[idx] + 1.5*iqr_x[idx]:
        #print(x[idx]+" is an outlier.")
        x[idx] = mean_x[idx]
  for y in y_train:
    for idx in range(0, 3):
      if float(y[idx]) < q25_y[idx] - 1.5*iqr_y[idx] or float(y[idx]) > q75_y[idx] + 1.5*iqr_y[idx]:
        #print(y[idx]+" is an outlier.")
        y[idx] = mean_y[idx]
  
  len = 84
  x_validation = x_train[len:]
  y_validation = y_train[len:]
  x_train = x_train[:len]
  y_train = y_train[:len]

  return

### Step 3: Implement Regression

In [7]:
def MSE(y_predicted, y_true, idx):

  squared_error = (y_predicted[:, idx] - y_true[:, idx].astype(float)) ** 2
  sum_squared_error = np.sum(squared_error)
  mse = sum_squared_error / len(y_true[:, idx])

  return mse

def MAPE(y_predicted, y_true, idx):

  mape = np.mean(np.abs((y_true[:, idx].astype(float) - y_predicted[:, idx]) / y_true[:, idx].astype(float)))

  return "{:.2%}".format(mape)

def Regression():
  global x_train, y_train, x_validation, y_validation, w_basic, degree, y_terms

  mean_y = []
  for i in range(0, 3):
    mean_y.append(np.mean(y_train[:, i].astype(float)))

  xA = np.ones(len(x_train[:, 0])).reshape((len(x_train[:, 0]), 1))
  for p in range(1, degree+1):
    xA = np.insert(xA, p, (x_train[:, 0].astype(float))**p, axis = 1)
  for i in range(1, y_terms+1):
    arr = y_train[:len(y_train[:, 0])-i, 0].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[0])
    xA = np.insert(xA, degree+i, arr, axis = 1)
  yA = y_train[:, 0].reshape((len(y_train[:, 0]), 1)).astype(float)
  w_basic.append(np.dot(np.dot(np.linalg.inv(np.dot(xA.transpose(), xA)), xA.transpose()), yA))

  xB = np.ones(len(x_train[:, 1])).reshape((len(x_train[:, 1]), 1))
  for p in range(1, degree+1):
    xB = np.insert(xB, p, (x_train[:, 1].astype(float))**p, axis = 1)
  for i in range(1, y_terms+1):
    arr = y_train[:len(y_train[:, 1])-i, 1].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[1])
    xB = np.insert(xB, degree+i, arr, axis = 1)
  yB = y_train[:, 1].reshape((len(y_train[:, 1]), 1)).astype(float)
  w_basic.append(np.dot(np.dot(np.linalg.inv(np.dot(xB.transpose(), xB)), xB.transpose()), yB))

  xC = np.ones(len(x_train[:, 2])).reshape((len(x_train[:, 2]), 1))
  for p in range(1, degree+1):
    xC = np.insert(xC, p, (x_train[:, 2].astype(float))**p, axis = 1)
  for i in range(1, y_terms+1):
    arr = y_train[:len(y_train[:, 2])-i, 2].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[2])
    xC = np.insert(xC, degree+i, arr, axis = 1)
  yC = y_train[:, 2].reshape((len(y_train[:, 2]), 1)).astype(float)
  w_basic.append(np.dot(np.dot(np.linalg.inv(np.dot(xC.transpose(), xC)), xC.transpose()), yC))

  y_predicted_train = []
  idx = 0
  for x in x_train:
    l = []
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree+1):
        sum += w_basic[i][p] * float(x[i]) ** p
      y = mean_y[i]
      if idx > 0:
        y = y_train[idx-1, i]
      sum += w_basic[i][degree+1] * float(y)
      y2 = mean_y[i]
      if idx > 1:
        y2 = y_train[idx-2, i]
      sum += w_basic[i][degree+2] * float(y2)
      y3 = mean_y[i]
      if idx > 2:
        y3 = y_train[idx-3, i]
      sum += w_basic[i][degree+3] * float(y3)
      l.append(float(sum))
    idx += 1
    y_predicted_train.append(l)
  y_predicted_train = np.array(y_predicted_train)
  
  y_predicted_validation = []
  idx = 0
  for x in x_validation:
    l = []
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree+1):
        sum += w_basic[i][p] * float(x[i]) ** p
      y = y_train[len(y_train)-1, i]
      if idx > 0:
        y = y_validation[idx-1, i]
      sum += w_basic[i][degree+1] * float(y)
      y2 = 0
      if idx < 2:
        y2 = y_train[len(y_train)+(idx-2), i]
      elif idx >= 2:
        y2 = y_validation[idx-2, i]
      sum += w_basic[i][degree+2] * float(y2)
      y3 = 0
      if idx < 3:
        y3 = y_train[len(y_train)+(idx-3), i]
      elif idx >= 3:
        y3 = y_validation[idx-3, i]
      sum += w_basic[i][degree+3] * float(y3)
      l.append(float(sum))
    idx += 1
    y_predicted_validation.append(l)
  y_predicted_validation = np.array(y_predicted_validation)

  '''
  print("MSE of Training Data A:", MSE(y_predicted_train, y_train, 0))
  print("MSE of Training Data B:", MSE(y_predicted_train, y_train, 1))
  print("MSE of Training Data C:", MSE(y_predicted_train, y_train, 2))
  print()
  print("MSE of Validation Data A:", MSE(y_predicted_validation, y_validation, 0))
  print("MSE of Validation Data B:", MSE(y_predicted_validation, y_validation, 1))
  print("MSE of Validation Data C:", MSE(y_predicted_validation, y_validation, 2))
  print()
  '''

  '''
  print("MAPE of Training Data A:", MAPE(y_predicted_train, y_train, 0))
  print("MAPE of Training Data B:", MAPE(y_predicted_train, y_train, 1))
  print("MAPE of Training Data C:", MAPE(y_predicted_train, y_train, 2))
  print()
  print("MAPE of Validation Data A:", MAPE(y_predicted_validation, y_validation, 0))
  print("MAPE of Validation Data B:", MAPE(y_predicted_validation, y_validation, 1))
  print("MAPE of Validation Data C:", MAPE(y_predicted_validation, y_validation, 2))
  print()
  '''

  return

### Step 4: Make Prediction

Make prediction of testing dataset and store the value in *output_datalist*

In [8]:
def MakePrediction():
  global output_datalist, x_test, y_test, y_validation, w_basic, degree

  week = 202143
  idx = 0
  for x in x_test:
    l = []
    l.append(week)
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree+1):
        sum += w_basic[i][p] * float(x[i]) ** p
      y = y_validation[len(y_validation)-1, i]
      if idx > 0:
        y = y_test[idx-1, i]
      sum += w_basic[i][degree+1] * float(y)
      y2 = 0
      if idx < 2:
        y2 = y_validation[len(y_validation)+(idx-2), i]
      elif idx >= 2:
        y2 = y_test[idx-2, i]
      sum += w_basic[i][degree+2] * float(y2)
      y3 = 0
      if idx < 3:
        y3 = y_validation[len(y_validation)+(idx-3), i]
      elif idx >= 3:
        y3 = y_test[idx-3, i]
      sum += w_basic[i][degree+3] * float(y3)
      l.append(float(sum))
      y_test[idx, i] = float(sum)
    idx += 1
    week += 1
    output_datalist.append(l)

  return

### Step 5: Train Model and Generate Result

* If the regression model is *3x^2 + 2x^1 + 1*, your output would be: 
```
3 2 1
```

In [9]:
SplitData()
PreprocessData()
Regression()
MakePrediction()

# Print coefficients
print("Format: w0 + w1*x + w2*x^2 + w3*y[i-1] + w4*y[i-2] + w5*y[i-3]")
for w in w_basic:
  for i in w:
    print(i[0], end = " ")
  print()

Format: w0 + w1*x + w2*x^2 + w3*y[i-1] + w4*y[i-2] + w5*y[i-3]
88.19744587259379 -6.123390631560916 0.11384392120334641 0.5532560607501292 0.14248971985171283 0.16509213933900213 
-286.69249426099907 25.388943141300537 -0.5517987983452397 0.45913991116650466 0.28395048599523554 0.10977712288018998 
5.041109722450386 -0.1923212523452138 0.0025648023383381124 0.6810187202913461 0.17893169141710188 0.08744350879374774 


## Write the Output File

Write the prediction to output csv
> Format: 'epiweek', 'CityA', 'CityB', 'CityC'

In [10]:
with open(output_dataroot, 'w', newline='', encoding="utf-8") as csvfile:
  writer = csv.writer(csvfile)
  for row in output_datalist:
    writer.writerow(row)

# 2. Advanced Part

Implement the regression in a different way than the basic part to help your predictions for the number of dengue cases


In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import math
import random


input_dataroot1 = 'basic_input.csv'
input_dataroot2 = 'advanced_input1.csv'
input_dataroot3 = 'advanced_input2.csv'
output_dataroot2 = 'advanced.csv'

input_datalist2_1 =  []
input_datalist2_2 =  []
input_datalist2_3 =  []
output_datalist2 =  []

# Global variables
x2 = []
y2 = []
x_train2 = []
y_train = []
x_validation2 = []
y_validation2 = []
x_test2 = []
y_test2 = []
w_advanced = []
degree2 = 2
y_terms2 = 3
y_terms2c = 2


with open(input_dataroot1, newline='') as csvfile:
  input_datalist2_1 = np.array(list(csv.reader(csvfile)))

with open(input_dataroot2, newline='') as csvfile:
  input_datalist2_2 = np.array(list(csv.reader(csvfile)))

with open(input_dataroot3, newline='') as csvfile:
  input_datalist2_3 = np.array(list(csv.reader(csvfile)))


def SplitData2():
  global x2, y2, x_train2, x_test2, y_train2, y_test2

  # Split input & output
  x2 = np.concatenate((input_datalist2_1[1:105, 1:4], input_datalist2_2[1:105, 1:4]), axis = 1)
  y2 = input_datalist2_1[1:105, 4:7]

  # Split training data & testing data
  x_train2 = x2[:94]
  y_train2 = y2[:94]
  x_test2 = x2[94:]
  y_test2 = y2[94:]

  return


def PreprocessData2():
  global x_train2, y_train2, x_validation2, y_validation2

  # Filter missing values
  temp_x = []
  for idx in range(0, 6):
    temp_x.append([float(x) for x in x_train2[:, idx] if x])
  temp_y = []
  for idx in range(0, 3):
    temp_y.append([float(y) for y in y_train2[:, idx] if y])

  # Quartiles and IQR
  q25_x = []
  for idx in range(0, 6):
    q25_x.append(np.percentile(temp_x[idx][:], 25))
  q75_x = []
  for idx in range(0, 6):
    q75_x.append(np.percentile(temp_x[idx][:], 75))
  iqr_x = []
  for idx in range(0, 6):
    iqr_x.append(q75_x[idx] - q25_x[idx])
  q25_y = []
  for idx in range(0, 3):
    q25_y.append(np.percentile(temp_y[idx][:], 25))
  q75_y = []
  for idx in range(0, 3):
    q75_y.append(np.percentile(temp_y[idx][:], 75))
  iqr_y = []
  for idx in range(0, 3):
    iqr_y.append(q75_y[idx] - q25_y[idx])

  #  Filter outliers
  for idx in range(0, 6):
    temp_x[idx] = [x for x in temp_x[idx] if not(x < q25_x[idx] - 1.5*iqr_x[idx] or x > q75_x[idx] + 1.5*iqr_x[idx])]
  for idx in range(0, 3):
    temp_y[idx] = [y for y in temp_y[idx] if not(y < q25_y[idx] - 1.5*iqr_y[idx] or y > q75_y[idx] + 1.5*iqr_y[idx])]

  # Mean values
  mean_x = []
  for idx in range(0, 6):
    mean_x.append(np.mean(temp_x[idx]))
  mean_y = []
  for idx in range(0, 3):
    mean_y.append(np.mean(temp_y[idx]))

  # Standard deviations
  sd_x = []
  for idx in range(0, 6):
    sd_x.append(np.std(temp_x[idx]))
  sd_y = []
  for idx in range(0, 3):
    sd_y.append(np.std(temp_y[idx]))

  # Deal with missing values

  for x in x_train2:
    for idx in range(0, 6):
      if x[idx] == '':
        x[idx] = mean_x[idx]
  for y in y_train2:
    for idx in range(0, 3):
      if y[idx] == '':
        y[idx] = mean_y[idx]

  # Deal with outliers
  
  # Interquartile range method
  for x in x_train2:
    for idx in range(0, 6):
      if float(x[idx]) < q25_x[idx] - 1.5*iqr_x[idx] or float(x[idx]) > q75_x[idx] + 1.5*iqr_x[idx]:
        #print(x[idx]+" is an outlier.")
        x[idx] = mean_x[idx]
  for y in y_train2:
    for idx in range(0, 3):
      if float(y[idx]) < q25_y[idx] - 1.5*iqr_y[idx] or float(y[idx]) > q75_y[idx] + 1.5*iqr_y[idx]:
        #print(y[idx]+" is an outlier.")
        y[idx] = mean_y[idx]
  
  len = 84
  x_validation2 = x_train2[len:]
  y_validation2 = y_train2[len:]
  x_train2 = x_train2[:len]
  y_train2 = y_train2[:len]

  return


def MSE2(y_predicted, y_true, idx):

  squared_error = (y_predicted[:, idx] - y_true[:, idx].astype(float)) ** 2
  sum_squared_error = np.sum(squared_error)
  mse = sum_squared_error / len(y_true[:, idx])

  return mse

def MAPE2(y_predicted, y_true, idx):

  mape = np.mean(np.abs((y_true[:, idx].astype(float) - y_predicted[:, idx]) / y_true[:, idx].astype(float)))

  return "{:.2%}".format(mape)

def Regression2():
  global x_train2, y_train2, x_validation2, y_validation2, w_advanced, degree2, y_terms2, y_terms2c

  mean_y = []
  for i in range(0, 3):
    mean_y.append(np.mean(y_train2[:, i].astype(float)))

  xA = np.ones(len(x_train2[:, 0])).reshape((len(x_train2[:, 0]), 1))
  for p in range(1, degree2+1):
    xA = np.insert(xA, p, (x_train2[:, 0].astype(float))**p, axis = 1)
  for p in range(1, degree2+1):
    xA = np.insert(xA, degree2+p, (x_train2[:, 3].astype(float))**p, axis = 1)
  for i in range(1, y_terms2+1):
    arr = y_train2[:len(y_train2[:, 0])-i, 0].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[0])
    xA = np.insert(xA, 2*degree2+i, arr, axis = 1)
  yA = y_train2[:, 0].reshape((len(y_train2[:, 0]), 1)).astype(float)
  w_advanced.append(np.dot(np.dot(np.linalg.inv(np.dot(xA.transpose(), xA)), xA.transpose()), yA))

  xB = np.ones(len(x_train2[:, 1])).reshape((len(x_train2[:, 1]), 1))
  for p in range(1, degree2+1):
    xB = np.insert(xB, p, (x_train2[:, 1].astype(float))**p, axis = 1)
  for p in range(1, degree2+1):
    xB = np.insert(xB, degree2+p, (x_train2[:, 4].astype(float))**p, axis = 1)
  for i in range(1, y_terms2+1):
    arr = y_train2[:len(y_train2[:, 1])-i, 1].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[1])
    xB = np.insert(xB, 2*degree2+i, arr, axis = 1)
  yB = y_train2[:, 1].reshape((len(y_train2[:, 1]), 1)).astype(float)
  w_advanced.append(np.dot(np.dot(np.linalg.inv(np.dot(xB.transpose(), xB)), xB.transpose()), yB))

  xC = np.ones(len(x_train2[:, 2])).reshape((len(x_train2[:, 2]), 1))
  for p in range(1, degree2+1):
    xC = np.insert(xC, p, (x_train2[:, 2].astype(float))**p, axis = 1)
  for p in range(1, degree2+1):
    xC = np.insert(xC, degree2+p, (x_train2[:, 5].astype(float))**p, axis = 1)
  for i in range(1, y_terms2c+1):
    arr = y_train2[:len(y_train2[:, 2])-i, 2].astype(float)
    for j in range(0, i):
      arr = np.insert(arr, 0, mean_y[2])
    xC = np.insert(xC, 2*degree2+i, arr, axis = 1)
  yC = y_train2[:, 2].reshape((len(y_train2[:, 2]), 1)).astype(float)
  w_advanced.append(np.dot(np.dot(np.linalg.inv(np.dot(xC.transpose(), xC)), xC.transpose()), yC))

  y_predicted_train = []
  idx = 0
  for x in x_train2:
    l = []
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree2+1):
        sum += w_advanced[i][p] * float(x[i]) ** p
      for p in range(1, degree2+1):
        sum += w_advanced[i][degree2+p] * float(x[i+3]) ** p
      y = mean_y[i]
      if idx > 0:
        y = y_train2[idx-1, i]
      sum += w_advanced[i][2*degree2+1] * float(y)
      y2 = mean_y[i]
      if idx > 1:
        y2 = y_train2[idx-2, i]
      sum += w_advanced[i][2*degree2+2] * float(y2)
      if i != 2:
        y3 = mean_y[i]
        if idx > 2:
          y3 = y_train2[idx-3, i]
        sum += w_advanced[i][2*degree2+3] * float(y3)
      l.append(float(sum))
    idx += 1
    y_predicted_train.append(l)
  y_predicted_train = np.array(y_predicted_train)
  
  y_predicted_validation = []
  idx = 0
  for x in x_validation2:
    l = []
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree2+1):
        sum += w_advanced[i][p] * float(x[i]) ** p
      for p in range(1, degree2+1):
        sum += w_advanced[i][degree2+p] * float(x[i+3]) ** p
      y = y_train2[len(y_train2)-1, i]
      if idx > 0:
        y = y_validation2[idx-1, i]
      sum += w_advanced[i][2*degree2+1] * float(y)
      y2 = 0
      if idx < 2:
        y2 = y_train2[len(y_train2)+(idx-2), i]
      elif idx >= 2:
        y2 = y_validation2[idx-2, i]
      sum += w_advanced[i][2*degree2+2] * float(y2)
      if i != 2:
        y3 = 0
        if idx < 3:
          y3 = y_train2[len(y_train2)+(idx-3), i]
        elif idx >= 3:
          y3 = y_validation2[idx-3, i]
        sum += w_advanced[i][2*degree2+3] * float(y3)
      l.append(float(sum))
    idx += 1
    y_predicted_validation.append(l)
  y_predicted_validation = np.array(y_predicted_validation)

  '''
  print("MSE of Training Data A:", MSE2(y_predicted_train, y_train2, 0))
  print("MSE of Training Data B:", MSE2(y_predicted_train, y_train2, 1))
  print("MSE of Training Data C:", MSE2(y_predicted_train, y_train2, 2))
  print()
  print("MSE of Validation Data A:", MSE2(y_predicted_validation, y_validation2, 0))
  print("MSE of Validation Data B:", MSE2(y_predicted_validation, y_validation2, 1))
  print("MSE of Validation Data C:", MSE2(y_predicted_validation, y_validation2, 2))
  print()
  '''

  '''
  print("MAPE of Training Data A:", MAPE2(y_predicted_train, y_train2, 0))
  print("MAPE of Training Data B:", MAPE2(y_predicted_train, y_train2, 1))
  print("MAPE of Training Data C:", MAPE2(y_predicted_train, y_train2, 2))
  print()
  print("MAPE of Validation Data A:", MAPE2(y_predicted_validation, y_validation2, 0))
  print("MAPE of Validation Data B:", MAPE2(y_predicted_validation, y_validation2, 1))
  print("MAPE of Validation Data C:", MAPE2(y_predicted_validation, y_validation2, 2))
  print()
  '''

  return


def MakePrediction2():
  global output_datalist2, x_test2, y_test2, y_validation2, w_advanced, degree2

  week = 202143
  idx = 0
  for x in x_test2:
    l = []
    l.append(week)
    for i in range(0, 3):
      sum = 0
      for p in range(0, degree2+1):
        sum += w_advanced[i][p] * float(x[i]) ** p
      for p in range(1, degree2+1):
        sum += w_advanced[i][degree2+p] * float(x[i+3]) ** p
      y = y_validation2[len(y_validation2)-1, i]
      if idx > 0:
        y = y_test2[idx-1, i]
      sum += w_advanced[i][2*degree2+1] * float(y)
      y2 = 0
      if idx < 2:
        y2 = y_validation2[len(y_validation2)+(idx-2), i]
      elif idx >= 2:
        y2 = y_test2[idx-2, i]
      sum += w_advanced[i][2*degree2+2] * float(y2)
      if i != 2:
        y3 = 0
        if idx < 3:
          y3 = y_validation2[len(y_validation2)+(idx-3), i]
        elif idx >= 3:
          y3 = y_test2[idx-3, i]
        sum += w_advanced[i][2*degree2+3] * float(y3)
      l.append(float(sum))
      y_test2[idx, i] = float(sum)
    idx += 1
    week += 1
    output_datalist2.append(l)

  return


SplitData2()
PreprocessData2()
Regression2()
MakePrediction2()

# Print coefficients
print("Format: w0 + w1*x1 + w2*x1^2 + w3*x2 + w4*x2^2 + w5*y[i-1] + w6*y[i-2] + w7*y[i-3]")
idx = 0
for w in w_advanced:
  if idx == 2:
    print("Format: w0 + w1*x1 + w2*x1^2 + w3*x2 + w4*x2^2 + w5*y[i-1] + w6*y[i-2]")
  for i in w:
    print(i[0], end = " ")
  print()
  idx += 1


with open(output_dataroot2, 'w', newline='', encoding="utf-8") as csvfile:
  writer = csv.writer(csvfile)
  for row in output_datalist2:
    writer.writerow(row)

Format: w0 + w1*x1 + w2*x1^2 + w3*x2 + w4*x2^2 + w5*y[i-1] + w6*y[i-2] + w7*y[i-3]
82.75884027372678 -5.728073098360248 0.10580930531350285 0.20983511711331437 -0.009150342848267198 0.5552548485326276 0.1365937751060622 0.1679260197946708 
-303.741287542661 26.871644969015243 -0.5828241984013879 0.23009084744854147 -0.04270006824116442 0.44622737088028147 0.2813561028026016 0.1206999373560042 
Format: w0 + w1*x1 + w2*x1^2 + w3*x2 + w4*x2^2 + w5*y[i-1] + w6*y[i-2]
-21.889704078344295 2.0123402971008773 -0.03955183400834408 -0.3371501236324219 0.00897571588491172 0.717896225997679 0.23653063625403709 
