In [None]:
import sys
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('./train.csv', encoding = 'big5')
data = data.iloc[:, 3:]
data[data == 'NR'] = 0
raw_data = data.to_numpy()

In [None]:
dayFlatten_data = np.empty([18,24*20*12])
for month in range(12):
  for day in range(20):
    dayFlatten_data[:, (month*20+day)*24:(month*20+day)*24+24] = raw_data[18*(20*month+day):18*(20*month+day+1), 0:24]

In [None]:
x = np.empty([12*471, 18*9], dtype=float)
y = np.empty([12*471, 1], dtype=float)
for month in range(12):
    for day in range(20):
        for hour in range(24):
            if day == 19 and hour > 14:
                continue
            x[month * 471 + day * 24 + hour, :] = dayFlatten_data[:,day * 24 + hour : day * 24 + hour + 9].flatten()
            y[month * 471 + day * 24 + hour, 0] = dayFlatten_data[9, day * 24 + hour + 9]
print(x)
print(y)

In [None]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
hyper_parameter = {
    "learning_rate" : 0.001,
    # "learning_rate" : 1e-5,
    "n_iterations" : 1000,
    "gamma1" : 0.9,
    "gamma2" : 0.999,
    "eps" : 1e-8,
}

dim = 18 * 9 + 1
w = np.zeros([dim,1])
x_b = np.concatenate((np.ones((x.shape[0], 1)), x), axis=1)
print(x_b)
m = x_b.shape[0]
grad_momentum = np.zeros_like(w)
grad_RMSProp = np.zeros_like(w)
losses = []
for i in range(hyper_parameter["n_iterations"]):
  loss = np.sqrt(np.mean(((x_b @ w) - y) ** 2))

  if np.isnan(loss) or np.isinf(loss):
    print(f"Stopping early due to numerical instability at iteration {i}")
    break
  losses.append(loss)

  if i % 100 == 0:
    print(str(i) + ':' + str(loss))

  gradient = 2 / m * (x_b.T @ (x_b @ w - y))
  grad_momentum = hyper_parameter["gamma1"] * grad_momentum + (1 - hyper_parameter["gamma1"]) * gradient
  grad_RMSProp = np.sqrt(hyper_parameter["gamma2"] * (grad_RMSProp ** 2) + (1-hyper_parameter["gamma2"]) * (gradient ** 2))
  # w = w - hyper_parameter["learning_rate"] * grad_momentum
  w = w - hyper_parameter["learning_rate"] * grad_momentum / (grad_RMSProp + hyper_parameter["eps"])

print(str(hyper_parameter["n_iterations"]) + ':' + str(loss))

np.save('weight.npy', w)

plt.plot(losses)
plt.xlabel("Iteration")
plt.ylabel("RMSE Loss")
plt.title("Loss Curve over Iterations")
plt.grid()
plt.show()

In [None]:
testdata = pd.read_csv('./test.csv', header = None, encoding = 'big5')
test_data = testdata.iloc[:, 2:]
test_data[test_data == 'NR'] = 0
test_data = test_data.to_numpy()
test_x = np.empty([240, 18*9], dtype = float)
for i in range(240):
    test_x[i, :] = test_data[18 * i: 18* (i + 1), :].reshape(1, -1)
# for i in range(len(test_x)):
#     for j in range(len(test_x[0])):
#         if std_x[j] != 0:
#             test_x[i][j] = (test_x[i][j] - mean_x[j]) / std_x[j]
test_x = np.concatenate((np.ones([240, 1]), test_x), axis = 1).astype(float)
test_x

In [None]:
# print(f"Shape of test_x: {test_x.shape}")
w = np.load('weight.npy')
ans_y = np.dot(test_x, w)
ans_y

In [None]:
import csv
with open('submit.csv', mode='w', newline='') as submit_file:
    csv_writer = csv.writer(submit_file)
    header = ['id', 'value']
    print(header)
    csv_writer.writerow(header)
    for i in range(240):
        row = ['id_' + str(i), ans_y[i][0]]
        csv_writer.writerow(row)
        print(row)