# Linear Regression

## SKLearn을 이용한 Linear Regression

In [None]:
# 데이터 준비
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=1,
                       bias=100, noise=40, random_state=1)


# 데이터 시각화
plt.scatter(X, y, label="data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# bias를 한번에 계산하기 위해, 1을 X에 추가해 줍니다.
import numpy as np
new_X = np.insert(X, 0, 1, axis=1)

# train, test 나눔
train_x, test_x, train_y, test_y = train_test_split(new_X, y, test_size=0.3, shuffle=True, random_state=1)

In [None]:
# Scikit-Learn 으로 학습
from sklearn.linear_model import LinearRegression

simple_linear = LinearRegression()
simple_linear.fit(train_x, train_y)

In [None]:
# train data에 대한 학습 모델 시각화
plt.scatter(train_x[:,1], train_y, label="train data")
plt.plot(train_x[:,1], simple_linear.predict(train_x), 'r-', label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# test data에 대한 학습 모델 시각화
plt.scatter(test_x[:,1], test_y, label="test data")
plt.plot(test_x[:,1], simple_linear.predict(test_x), 'r',  label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# 평가
def RMSE(pred, target):
  error = pred - target
  square = error * error
  mean = np.mean(square)
  root = np.sqrt(mean)
  return root

pred = simple_linear.predict(train_x)
print("Train RMSE =", RMSE(pred, train_y))

pred = simple_linear.predict(test_x)
print("Test RMSE =", RMSE(pred, test_y))

## Linear Regression의 Numerical Solution

In [None]:
# 데이터 준비
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=1,
                       bias=100, noise=40, random_state=1)


# 데이터 시각화
plt.scatter(X, y, label="data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# bias를 한번에 계산하기 위해, 1을 X에 추가해 줍니다.
import numpy as np
new_X = np.insert(X, 0, 1, axis=1)

# train, test 나눔
train_x, test_x, train_y, test_y = train_test_split(new_X, y, test_size=0.3, shuffle=True, random_state=1)

In [None]:
class LinearRegression_gd:
    def __init__(self, num_features, seed):
        np.random.seed(seed)
        self.W = np.random.rand(num_features, 1)
        
    def train(self, train_x, train_y, num_epochs, learning_rate):
        num_data, num_features = train_x.shape

        loss_memory = []
        train_y = np.expand_dims(train_y, 1)

        for i in range(num_epochs):

            # prediction 계산
            prediction = np.matmul(train_x, self.W)

            # Error 및 Loss 계산
            error = prediction - train_y
            loss = np.mean(error * error) / 2

            # Gradient 계산 
            gradient= np.mean(train_x * error, axis=0, keepdims=True).T # 𝝏𝑳(𝒙, 𝑾)/𝝏𝑾

            # Weight Update
            # gradient, learning_rate 활용하여 self.W 업데이트
            self.W -= learning_rate * gradient

            # Loss ‘loss_memory’에 추가
            loss_memory.append(loss)

        # ‘loss_memory’ 반환
        return loss_memory

    def predict(self, test_x):
        pred = np.matmul(test_x, self.W).squeeze()
        return pred

In [None]:
# Hyper-parameter
num_epochs = 1500
learning_rate = 0.01
seed = 15

num_data, num_features = train_x.shape
model = LinearRegression_gd(num_features, seed)
loss_memory = model.train(train_x, train_y, num_epochs, learning_rate)

In [None]:
# Plot Loss
x_axis = list(range(num_epochs))

plt.plot(x_axis, loss_memory)
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('MSE Loss')

plt.show()

In [None]:
# train data에 대한 학습 모델 시각화
plt.scatter(train_x[:,1], train_y, label="train data")
plt.plot(train_x[:,1], model.predict(train_x), 'r-', label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# test data에 대한 학습 모델 시각화
plt.scatter(test_x[:,1], test_y, label="test data")
plt.plot(test_x[:,1], model.predict(test_x), 'r-', label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# 평가
def RMSE(pred, target):
  error = pred - target
  square = error * error
  mean = np.mean(square)
  root = np.sqrt(mean)
  return root

pred = model.predict(train_x)
print("Train RMSE =", RMSE(pred, train_y))

pred = model.predict(test_x)
print("Test RMSE =", RMSE(pred, test_y))

## Linear Regression의 Analytical Solution

In [None]:
# 데이터 준비
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=1,
                       bias=100, noise=40, random_state=1)


# 데이터 시각화
plt.scatter(X, y, label="data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# bias를 한번에 계산하기 위해, 1을 X에 추가해 줍니다.
import numpy as np
new_X = np.insert(X, 0, 1, axis=1)

# train, test 나눔
train_x, test_x, train_y, test_y = train_test_split(new_X, y, test_size=0.3, shuffle=True, random_state=1)

In [None]:
# Normal Equation 
first = np.linalg.inv(np.matmul(train_x.T, train_x))
second = np.matmul(train_x.T, train_y)
W = np.matmul(first, second)

pred_train = np.matmul(train_x, W)
pred_test = np.matmul(test_x, W)

In [None]:
# train data에 대한 학습 모델 시각화
plt.scatter(train_x[:,1], train_y, label="train data")
plt.plot(train_x[:,1], pred_train, 'r-', label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# test data에 대한 학습 모델 시각화
plt.scatter(test_x[:,1], test_y, label="test data")
plt.plot(test_x[:,1], pred_test, 'r',  label="model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()

In [None]:
# 평가
def RMSE(pred, target):
  error = pred - target
  square = error * error
  mean = np.mean(square)
  root = np.sqrt(mean)
  return root

print("Train RMSE =", RMSE(pred_train, train_y))

print("Test RMSE =", RMSE(pred_test, test_y))

## 당뇨병 데이터에서, SKLearn, Numerical Solution, Analytical Solution 비교


In [None]:
# 데이터 불러오기
from sklearn.datasets import load_diabetes
datasets = load_diabetes()

# bias를 한번에 계산하기 위해, 1을 X에 추가해 줍니다.
import numpy as np
new_X = np.insert(datasets.data, 0, 1, axis=1)

# train, test 나눔
train_x, test_x, train_y, test_y = train_test_split(new_X, datasets.target, test_size=0.3, shuffle=True, random_state=1)

In [None]:
# Scikit-Learn 으로 학습
from sklearn.linear_model import LinearRegression

simple_linear = LinearRegression()
simple_linear.fit(train_x, train_y)

In [None]:
# 평가
def RMSE(pred, target):
  error = pred - target
  square = error * error
  mean = np.mean(square)
  root = np.sqrt(mean)
  return root

pred = simple_linear.predict(train_x)
print("Train RMSE =", RMSE(pred, train_y))

pred = simple_linear.predict(test_x)
print("Test RMSE =", RMSE(pred, test_y))

In [None]:
# Analytical Solution 코드 추가하기

In [None]:
# Numerical Solution 코드 추가하기

## 행복지수 데이터에서, SKLearn, Numerical Solution, Analytical Solution 비교


In [None]:
import numpy as np
import os                            # 데이터 파일 경로 설정
import csv                           # 데이터 파일 로드

def Load_Dataset(filename):
    with open(filename, 'r') as f:
        csv_reader = csv.reader(f)                  # 파일 로드
        header = next(csv_reader)

        x_data = []
        y_data = []
        for line in csv_reader:
            features = line[6:]
            x = [1.0] + [float(i) for i in features]
            y = float(line[2])
            x_data.append(x)
            y_data.append(y)

        x_array = np.array(x_data)
        y_array = np.array(y_data)

    return header, x_array, y_array

In [None]:
# 데이터 불러오기
_, train_x, train_y = Load_Dataset('/content/happiness_train.csv')
_, test_x, test_y = Load_Dataset('/content/happiness_test.csv')

In [None]:
# Scikit-Learn 으로 학습
from sklearn.linear_model import LinearRegression

simple_linear = LinearRegression()
simple_linear.fit(train_x, train_y)

In [None]:
# 평가
def RMSE(pred, target):
  error = pred - target
  square = error * error
  mean = np.mean(square)
  root = np.sqrt(mean)
  return root

pred = simple_linear.predict(train_x)
print("Train RMSE =", RMSE(pred, train_y))

pred = simple_linear.predict(test_x)
print("Test RMSE =", RMSE(pred, test_y))

In [None]:
# Analytical Solution 코드 추가하기

In [None]:
# Numerical Solution 코드 추가하기