<a href="https://colab.research.google.com/github/JK-the-Ko/Thermo-Fluid-Dynamics-Experiment/blob/main/2022-2/%EC%97%B4%EC%9C%A0%EC%B2%B4%EA%B3%B5%ED%95%99%EC%8B%A4%ED%97%98_Week_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regression

## Import Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Import Scikit-Learn

In [None]:
import sklearn

In [None]:
sklearn.__version__

## Get Regression Dataset

In [None]:
from sklearn import datasets

In [None]:
data = datasets.load_diabetes(as_frame = True)

In [None]:
x, y = data.data, data.target

In [None]:
x

In [None]:
y

In [None]:
inputFeatures = x.columns
print(inputFeatures)

## Dataset Preprocessing

In [None]:
from sklearn.preprocessing import MinMaxScaler

### Scale Input Dataset

In [None]:
x, y = np.array(x), np.array(y)

In [None]:
mmScalerX = MinMaxScaler()
mmScalerX.fit(x)
x = mmScalerX.transform(x)

### Scale Target Dataset

In [None]:
mmScalerY = MinMaxScaler()
mmScalerY.fit(y.reshape(-1, 1))
y = mmScalerY.transform(y.reshape(-1, 1)).reshape(-1)

### Split Dataset

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.3, random_state = 42)

In [None]:
print(xTrain.shape, yTrain.shape)
print(xTest.shape, yTest.shape)

## Linear Regression

In [None]:
def getParameter(x: np.array, y: np.array) :
  xT = np.transpose(x)
  output = np.matmul(np.matmul(np.linalg.inv(np.matmul(xT, x)), xT), y)

  return output

In [None]:
betaHat = getParameter(xTrain, yTrain)

In [None]:
yTestHat = np.matmul(xTest, betaHat)

### Model Evaluation

In [None]:
def plotBarChart(yTest, yTestHat) :
  fig, ax = plt.subplots(figsize = (10,4))
  idx = np.asarray([i for i in range(50)])
  width = 0.2

  ax.bar(idx, yTest[:50], width = width)
  ax.bar(idx+width, yTestHat[:50], width = width)
  ax.set_xticks(idx)
  ax.legend(["Ground Truth", "Prediction"])
  ax.set_xlabel("# samples")
  ax.set_ylabel("Value")

  fig.tight_layout()
  plt.show()

In [None]:
def RMSE(yHat: np.array, y: np.array) :
  output = np.sqrt(np.mean(np.power(y - yHat, 2)))

  return output

In [None]:
yTest = mmScalerY.inverse_transform(yTest.reshape(-1, 1)).reshape(-1)
yTestHat = mmScalerY.inverse_transform(yTestHat.reshape(-1, 1)).reshape(-1)

In [None]:
plotBarChart(yTest, yTestHat)

In [None]:
RMSE(yTestHat, yTest)

## Deep Neural Network (DNN)

### Import Keras Library

In [None]:
from tensorflow import keras

### Fix Seed

In [None]:
import tensorflow as tf
from keras import backend as K
import random

def fixSeed(numSeed = 42) :
  np.random.seed(numSeed)
  random.seed(numSeed)
  tf.random.set_seed(numSeed)

  sessionConf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
  sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=sessionConf)
  K.set_session(sess)

### Build Neural Network

In [None]:
K.clear_session()
fixSeed()

In [None]:
model = keras.Sequential()

In [None]:
model.add(keras.layers.Dense(16, "relu", input_dim=x.shape[1]))
model.add(keras.layers.Dense(64, "relu"))
model.add(keras.layers.Dense(1))

### Summarize Model

In [None]:
model.summary()

### Visualize Model

In [None]:
from keras.utils import plot_model
plot_model(model, to_file="model.png")

### Compile Model

In [None]:
lr, batchSize, epoch = 1e-2, 32, 500

In [None]:
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=lr), metrics=["mae"])

### Train Model

In [None]:
history = model.fit(xTrain, yTrain, batch_size=batchSize, epochs=epoch)

### Visualize Training Procedure

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["loss"], label="Training RMSE Loss")
plt.xlabel("# Epoch")
plt.ylabel("RMSE Loss")
plt.title("Training Loss")
plt.legend(loc="best")
plt.show()

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["mae"], label="Training MAE Loss")
plt.xlabel("# Epoch")
plt.ylabel("MAE Loss")
plt.title("Training Loss")
plt.legend(loc="best")
plt.show()

### Inference Result

In [None]:
yPred = model.predict(xTest)

In [None]:
print(type(yPred))
print(yPred.shape)

### Model Evaluation

In [None]:
yPred = mmScalerY.inverse_transform(yPred).reshape(-1)

In [None]:
plotBarChart(yTest, yPred)

In [None]:
RMSE(yPred, yTest)

# Binary Class Classification

## Get Classification Dataset

In [None]:
data = datasets.load_breast_cancer(as_frame = True)

In [None]:
x, y = data.data, data.target

In [None]:
inputFeatures = x.columns
print(inputFeatures)

## Dataset Analysis

In [None]:
x.describe()

In [None]:
y.plot.hist()
plt.show()

## Dataset Preprocessing

### Scale Input Dataset

In [None]:
x, y = np.array(x), np.array(y)

In [None]:
mmScalerX = MinMaxScaler()
mmScalerX.fit(x)
x = mmScalerX.transform(x)

### Split Dataset

In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.3, random_state = 42)

In [None]:
print(xTrain.shape, yTrain.shape)
print(xTest.shape, yTest.shape)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lgReg = LogisticRegression(max_iter = 10000, random_state = 42)
lgReg.fit(xTrain, yTrain)
yTestHat = lgReg.predict(xTest)

### Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
accScore = accuracy_score(yTest, yTestHat)
print(accScore)

In [None]:
cm = confusion_matrix(yTest, yTestHat)
print(cm)

In [None]:
clsRp = classification_report(yTest, yTestHat)
print(clsRp)

## Deep Neural Network

### Build Neural Network

In [None]:
K.clear_session()
fixSeed()

In [None]:
model = keras.Sequential()

In [None]:
model.add(keras.layers.Dense(16, "relu", input_dim=x.shape[1]))
model.add(keras.layers.Dense(64, "relu"))
model.add(keras.layers.Dense(1, "sigmoid"))

### Summarize Model

In [None]:
model.summary()

### Visualize Model

In [None]:
from keras.utils import plot_model
plot_model(model, to_file="model.png")

### Compile Model

In [None]:
lr, batchSize, epoch = 1e-2, 32, 500

In [None]:
model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.SGD(learning_rate=lr), metrics=["accuracy"])

### Train Model

In [None]:
history = model.fit(xTrain, yTrain, batch_size=batchSize, epochs=epoch)

### Visualize Training Procedure

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["loss"], label="Training Loss")
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.legend(loc="best")
plt.show()

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["accuracy"], label="Training Accuracy")
plt.xlabel("# Epoch")
plt.ylabel("Accuracy")
plt.title("Training Accuracy")
plt.legend(loc="best")
plt.show()

### Inference Result

In [None]:
yPred = model.predict(xTest)

In [None]:
print(type(yPred))
print(yPred.shape)

### Model Evaluation

In [None]:
yPred

#### Threshold Result

In [None]:
yPred = np.where(yPred > 0.5, 1, 0)

In [None]:
yPred = yPred.reshape(-1)

In [None]:
accScore = accuracy_score(yTest, yPred)
print(accScore)

In [None]:
cm = confusion_matrix(yTest, yPred)
print(cm)

In [None]:
clsRp = classification_report(yTest, yPred)
print(clsRp)

# Multi Class Classification

## Dry Bean Dataset

### Load Dataset

In [None]:
df = pd.read_csv("/content/Dry_Bean_Dataset.csv")

In [None]:
df.shape

In [None]:
df.head(10)

### Data Preprocessing

In [None]:
x = df.drop(columns = "Class", axis = 1)
y = df["Class"]

In [None]:
inputFeatures = x.columns

#### Scale Input Dataset

In [None]:
mmScaler = MinMaxScaler()
mmScaler.fit(x)
x = mmScaler.transform(x)

#### Label Encode Target Dataset

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
y

In [None]:
lbEnc = LabelEncoder()
lbEnc.fit(y)
y = lbEnc.transform(y)

In [None]:
y

#### Split Dataset

In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.1, random_state = 42)

In [None]:
print(xTrain.shape, yTrain.shape)
print(xTest.shape, yTest.shape)

## Random Forest Classification

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfCls = RandomForestClassifier(random_state = 42)
rfCls.fit(xTrain, yTrain)
yTestHat = rfCls.predict(xTest)

### Model Evaluation

In [None]:
accScore = accuracy_score(yTest, yTestHat)
print(accScore)

In [None]:
cm = confusion_matrix(yTest, yTestHat)
print(cm)

In [None]:
clsRp = classification_report(yTest, yTestHat)
print(clsRp)

## Deep Neural Network

### Data Preprocessing

In [None]:
yTrain

#### One-Hot Encoding

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
yTrainOneHot = to_categorical(yTrain)

In [None]:
print(yTrain[0])
print(yTrainOneHot[0])

### Build Neural Network

In [None]:
K.clear_session()
fixSeed()

In [None]:
model = keras.Sequential()

In [None]:
model.add(keras.layers.Dense(16, keras.layers.LeakyReLU(), input_dim=x.shape[1]))
model.add(keras.layers.Dense(32, keras.layers.LeakyReLU()))
model.add(keras.layers.Dense(32, keras.layers.LeakyReLU()))
model.add(keras.layers.Dense(32, keras.layers.LeakyReLU()))
model.add(keras.layers.Dense(7, "softmax"))

### Summarize Model

In [None]:
model.summary()

### Visualize Model

In [None]:
from keras.utils import plot_model
plot_model(model, to_file="model.png")

### Compile Model

In [None]:
lr, batchSize, epoch = 1e-2, 32, 200

In [None]:
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=lr), metrics=["accuracy"])

### Train Model

In [None]:
history = model.fit(xTrain, yTrainOneHot, batch_size=batchSize, epochs=epoch)

### Visualize Training Procedure

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["loss"], label="Training Loss")
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.legend(loc="best")
plt.show()

In [None]:
plt.subplots(figsize = (10,5))
plt.plot(np.arange(epoch), history.history["accuracy"], label="Training Accuracy")
plt.xlabel("# Epoch")
plt.ylabel("Accuracy")
plt.title("Training Accuracy")
plt.legend(loc="best")
plt.show()

### Inference Result

In [None]:
yPred = model.predict(xTest)

In [None]:
print(type(yPred))
print(yPred.shape)

### Model Evaluation

In [None]:
yPred

In [None]:
yPred[0]

#### Postprocess Result

In [None]:
yPred = np.argmax(yPred, axis = 1)

In [None]:
yPred[0]

In [None]:
yPred = yPred.reshape(-1)

In [None]:
accScore = accuracy_score(yTest, yPred)
print(accScore)

In [None]:
cm = confusion_matrix(yTest, yPred)
print(cm)

In [None]:
clsRp = classification_report(yTest, yPred)
print(clsRp)

# 실습

## Scikit-Learn의 Digits Dataset을 기반으로 Multi Class Classification을 진행하세요.
### **1) Dataset를 불러온 후 Min-Max Scailing을 진행하세요.**
### **2) 다음과 같은 인공신경망을 만드세요.**
### 2.1) 모델 구조 : 입력 → 32 노드 → 32 노드 → 32 노드 → 출력
### 2.2) 활성화 함수 : Leaky ReLU
### **3) 다음과 같은 하이퍼파라미터를 사용하여 모델을 훈련하세요.**
### 3.1) Optimizer : Adam
### 3.2) Learning Rate : 1e-2
### 3.3) Batch Size : 32
### 3.4) Epoch : 200
### **4) 훈련된 모델의 성능을 평가하세요.**

In [None]:
data = datasets.load_digits(as_frame = True)