# Machine Learning HW2

## Read Training Set

In [1]:
import numpy as np
import pandas as pd

In [2]:
train_file = 'data/X_train'
label_file = 'data/Y_train'
test_file = 'data/X_test'

with open(train_file, 'r') as f:
    next(f) # 跳过首行表头
    X_train = np.array([line.strip('\n').split(',')[1:] for line in f], dtype = float)
with open(label_file, 'r') as f:
    next(f)
    Y_train = np.array([line.strip('\n').split(',')[1] for line in f], dtype = float)
with open(test_file, 'r') as f:
    next(f)
    X_test = np.array([line.strip('\n').split(',')[1:] for line in f], dtype = float)

### Normalization

In [3]:
def normalize(X, train = True, normal_column = None):
    # This function normalize specific colunms of X
    # 
    # Arguments:
    #     X: data to be processed
    #     train: Boolean,'True' when processing traning data,'False' when not
    #     normal_column: indexes of the colunms that will be normalized.If 'None' when all of colunms will be normalized
    # Outputs:
    #     X: nomarlized data
    #     X_mean: computed mean value of training data
    #     X_std: computed standard deviation of training data

    if not normal_column:
        normal_column = np.arange(X.shape[1])
    if train:
        X_mean = np.mean(X[:,normal_column], axis = 0).reshape(1, -1)
        X_std = np.std(X[:,normal_column], axis = 0).reshape(1, -1)
    X[:,normal_column] = (X[:,normal_column] - X_mean) / X_std
    return X, X_mean, X_std

### Shuffle

In [4]:
def shuffle(X, Y):
    # This function shuffles two equal-length list/array, X and Y, together.

    randomize = np.arange(len(X))
    np.random.shuffle(randomize)
    return X[randomize], Y[randomize]

### Diveid train and val

In [5]:
def train_dev_divide(X, Y, div_ratio = 0.2):
    # This function normalize specific colunms of X

    train_size = int(len(X) * (1 - div_ratio))
    return X[:train_size], Y[:train_size], X[train_size:], Y[train_size:]

### Data preprocessing

In [6]:
X, Y = shuffle(X_train, Y_train)
X, _, _ = normalize(X)
train_X, train_y, val_X, val_y = train_dev_divide(X, Y)

In [7]:
train_size = train_X.shape[0]
val_size = val_X.shape[0]
params_size = train_X.shape[1]
print(train_X.shape)

(43404, 510)


## Training
**Logistic Regression & Adam**

### Sigmoid function

In [8]:
def sigmoid_func(z):
    # Sigmoid function can be used to calculate probability.
    # To avoid overflow, minimum/maximum output value is set.
    return 1 / (1 + np.exp(-z))

### Adam

In [None]:
params = int(params_size + 1)
w = np.zeros([params,1])
X = np.concatenate((np.ones([train_size,1]), train_X), axis = 1)

# Initialize the parameters of the adam
print("Initialize successfully ")
iteration = 10
learning_rate = 0.001
s = np.empty([params,1])
r = np.empty([params,1])
p_1 = 0.9
p_2 = 0.999
eps = 1e-8
print("Train processing begin ... ")
for i in range(iteration):
    loss = np.sum(np.power(sigmoid_func(np.dot(X, w)) - train_y, 2), axis = 1)
    print(loss)