# **2022 ML FALL HW1: PM2.5 Prediction (Regression)**

# **Import Some Packages**

In [282]:
import numpy as np
import csv
import math
import pandas as pd

# **Fix random seed**

In [285]:
seed = 9487
np.random.seed()

In [287]:
def valid(x, y):
  return True


# Create your dataset
def parse2train(data, feats):

  x = []
  y = []

  # Use data #0~#7 to predict #8 => Total data length should be decresased by 8.
  total_length = data.shape[1] - 8

  for i in range(total_length):
    x_tmp = data[feats, i:i+8] # Use data #0~#7 to predict #8, data #1~#8 to predict #9, etc.
    y_tmp = data[-1, i+8] # last column of (i+8)th row: PM2.5

    # Filter out extreme values to train.
    if valid(x_tmp, y_tmp):
      x.append(x_tmp.reshape(-1,))
      y.append(y_tmp)
  
  # x.shape: (n, 15, 8)
  # y.shape: (n, 1) 
  x = np.array(x)
  y = np.array(y)

  return x,y


# **Adam**
* Gradient descent algorithm. Adam was implemented.
* Ref: https://arxiv.org/pdf/1412.6980.pdf

![](https://i.imgur.com/jRaebdf.png)



In [333]:
def minibatch(x, y, config):
    # Randomize the data in minibatch
    index_tr = np.arange(x.shape[0])[:80]
    index_va = np.arange(x.shape[0])[-80:]
    np.random.shuffle(index_tr)
    x_tr = x[index_tr]
    y_tr = y[index_tr]
    x_va = x[index_va]
    y_va = y[index_va]
    # Initialization
    batch_size = config.batch_size
    lr = config.lr
    lam = config.lam
    epoch = config.epoch

    beta_1 = np.full(x[0].shape, 0.9).reshape(-1, 1)
    beta_2 = np.full(x[0].shape, 0.99).reshape(-1, 1)
    # Linear regression: only contains two parameters (w, b).
    w1 = np.full(x[0].shape, 0.1).reshape(-1, 1)
    w2 = np.full(x[0].shape, 0.1).reshape(-1, 1)
    bias = 0.1
    m_t = np.full(x[0].shape, 0).reshape(-1, 1)
    v_t = np.full(x[0].shape, 0).reshape(-1, 1)
    m_t_b = 0.0
    v_t_b = 0.0
    t = 0
    epsilon = 1e-8
    # min
    loss_va_min, w1b ,w2b, biasb = 3.6, 0, 0, 0
    
    # Training loop
    for num in range(epoch):
        loss_va = 0
        for b in range(int(x.shape[0]/batch_size)):
            t+=1
            x_batch = x_tr[b*batch_size:(b+1)*batch_size]
            y_batch = y_tr[b*batch_size:(b+1)*batch_size].reshape(-1,1)

            # Prediction of linear regression 
            pred = np.dot(np.square(x_batch), w2) + np.dot(x_batch,w1) + bias
            # loss
            loss = y_batch - pred
            
            # Compute gradient
            g_t_w1 = np.dot(x_batch.transpose(),loss) * (-2)
            g_t_w2 = np.dot(x_batch.transpose(),loss) * (-2)
            g_t_b = loss.sum(axis=0) * (-2)
            m_t_w1 = beta_1*m_t + (1-beta_1)*g_t_w1
            m_t_w2 = beta_1*m_t + (1-beta_1)*g_t_w2
            v_t_w1 = beta_2*v_t + (1-beta_2)*np.multiply(g_t_w1, g_t_w1)
            v_t_w2 = beta_2*v_t + (1-beta_2)*np.multiply(g_t_w2, g_t_w2)
            m_cap_w1 = m_t_w1/(1-(beta_1**t))
            m_cap_w2 = m_t_w2/(1-(beta_1**t))
            v_cap_w1 = v_t_w1/(1-(beta_2**t))
            v_cap_w2 = v_t_w2/(1-(beta_2**t))
            m_t_b = 0.9*m_t_b + (1-0.9)*g_t_b
            v_t_b = 0.99*v_t_b + (1-0.99)*(g_t_b*g_t_b) 
            m_cap_b = m_t_b/(1-(0.9**t))
            v_cap_b = v_t_b/(1-(0.99**t))
            w1_0 = np.copy(w1)
            w2_0 = np.copy(w2)
            # Update weight & bias
            w1 -= ((lr*m_cap_w1)/(np.sqrt(v_cap_w1)+epsilon)).reshape(-1, 1)
            w2 -= ((lr*m_cap_w2)/(np.sqrt(v_cap_w2)+epsilon)).reshape(-1, 1)
            bias -= (lr*m_cap_b)/(math.sqrt(v_cap_b)+epsilon)
        
        pred_va = np.dot(np.square(x_va), w2) + np.dot(x_va,w1) + bias
        
        loss_va = np.mean(y_va.reshape(-1,1) - pred_va)
        print(f"epoch{num}, valid loss:{loss_va}") if num%300 == 0 else None
        if abs(loss_va_min) > abs(loss_va) and loss_va > 0 and num > 1000:
          w1b = w1
          w2b = w2
          biasb = bias
          loss_va_min = loss_va
          print(f"save best at {num}, loss = {loss_va_min}")

    return w1, w2, bias, w1b, w2b, biasb

In [349]:
from argparse import Namespace

# TODO: Tune the config to boost your performance. 
train_config = Namespace(
    batch_size = 256,
    lr = 1e-4,
    lam = 0.0001,
    epoch = 15000,
)


# **Training regression model**

In [350]:
data = pd.read_csv(f"{os.getcwd()}/train.csv")
data.corr().loc["PM2.5",:]

AMB_TEMP     -0.176147
CO            0.659148
NO            0.227219
NO2           0.554274
NOx           0.513650
O3            0.233924
PM10          0.818868
WS_HR        -0.102047
RAINFALL     -0.060801
RH           -0.081576
SO2           0.361333
WD_HR         0.171932
WIND_DIREC    0.137658
WIND_SPEED   -0.101197
PM2.5         1.000000
Name: PM2.5, dtype: float64

In [1]:
feats = [1, 3, 6, 14]

In [352]:
# Training data preprocessing.

data = data.values
train_data = np.transpose(np.array(np.float64(data)))
train_x, train_y = parse2train(train_data, feats)

In [353]:
# Train regression model

w1, w2, bias, w1b, w2b, biasb = minibatch(train_x, train_y, train_config)

epoch0, valid loss:-641.6025262422852
epoch300, valid loss:-446.6767541651931
epoch600, valid loss:-250.25116182245856
epoch900, valid loss:-53.82726135351679
save best at 1002, loss = 0.5600952567850807
save best at 1004, loss = 0.5241357436440943
save best at 1006, loss = 0.4980161736383926
save best at 1008, loss = 0.46067455028549187
save best at 1010, loss = 0.43450357003262924
save best at 1012, loss = 0.39712530712480576
save best at 1014, loss = 0.3710006154477143
save best at 1016, loss = 0.37025880369055064
save best at 1018, loss = 0.3695119300680127
save best at 1020, loss = 0.3687608882618502
save best at 1022, loss = 0.3680074037217198
save best at 1024, loss = 0.3672526320069175
save best at 1026, loss = 0.36649733210746394
save best at 1028, loss = 0.36574199942515534
save best at 1030, loss = 0.36498695626214306
save best at 1032, loss = 0.36423241209563606
save best at 1034, loss = 0.36347850321908926
save best at 1036, loss = 0.3627253186033167
save best at 1038, los

# **Testing:**


In [354]:
def parse2test(data, feats):
  x = []
  for i in range(90):
    x_tmp = data[feats,8*i: 8*i+8]
    x.append(x_tmp.reshape(-1,))
  # x.shape: (n, 15, 8)
  x = np.array(x)
  return x


In [355]:
data = pd.read_csv('test.csv')
data = data.values

test_data = np.transpose(np.array(np.float64(data)))
test_x = parse2test(test_data, feats)

# **Write result as .csv**

---



In [356]:
 with open('re.csv', 'w', newline='') as csvf:
    writer = csv.writer(csvf)
    writer.writerow(['Id','Predicted'])
    print(test_x.shape) 
    for i in range(int(test_x.shape[0])): 
      prediction = ( np.dot(np.reshape(w2b,-1),np.square(test_x[i])) + np.dot(np.reshape(w1b,-1),test_x[i]) + biasb)[0]
      writer.writerow([i, prediction])

(90, 32)


In [357]:
print("w2\n", w2b, "\nw1\n", w1b, "\nbias\n", biasb)

w2
 [[ 0.00342926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00222925]
 [ 0.00162926]
 [ 0.00282926]
 [ 0.00482926]
 [ 0.03242926]
 [ 0.00402926]
 [ 0.00282926]
 [ 0.00082926]
 [-0.00157076]
 [-0.00337075]
 [ 0.00082926]
 [ 0.00262926]
 [ 0.00282926]
 [ 0.00062926]
 [ 0.00082926]
 [ 0.00042926]
 [-0.00117074]
 [ 0.00162926]
 [ 0.00262926]
 [ 0.00282926]
 [ 0.00202926]
 [ 0.00302926]
 [ 0.00302926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00582926]] 
w1
 [[ 0.00342926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00222925]
 [ 0.00162926]
 [ 0.00282926]
 [ 0.00482926]
 [ 0.03242926]
 [ 0.00402926]
 [ 0.00282926]
 [ 0.00082926]
 [-0.00157076]
 [-0.00337075]
 [ 0.00082926]
 [ 0.00262926]
 [ 0.00282926]
 [ 0.00062926]
 [ 0.00082926]
 [ 0.00042926]
 [-0.00117074]
 [ 0.00162926]
 [ 0.00262926]
 [ 0.00282926]
 [ 0.00202926]
 [ 0.00302926]
 [ 0.00302926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00282926]
 [ 0.00582926]] 
bias
 [1.44646702]
