## Make/Load dataset

In [None]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=500, n_features=10, n_informative=4,
                             n_clusters_per_class=2, random_state=14)

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()
X = boston.data
y = boston.target

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data[:, 2:]
y = iris.target

## Standardize/ Normalize data

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [1]:
from sklearn.preprocessing import Normalizer
X = [[4, 1, 2, 2],
     [1, 3, 9, 3],
     [5, 7, 5, 1]]
transformer = Normalizer().fit(X)  # fit does nothing.
transformer
Normalizer()
transformer.transform(X)

array([[0.8, 0.2, 0.4, 0.4],
       [0.1, 0.3, 0.9, 0.3],
       [0.5, 0.7, 0.5, 0.1]])

## Split Data

#### sklearn

In [None]:
X, y = None, None

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

#### from scratch

In [None]:
# Shuffle
data = np.concatenate((X, y.reshape(-1,1)), axis=1)
rng = np.random.default_rng()
rng.shuffle(data)
X = data[:,:-1]
y = data[:,-1]

In [None]:
# data split
train_size = round(70/100 * X.shape[0])
X_train = X[:train_size, :]
Y_train = y[:train_size]
X_test = X[train_size:,:]
Y_test = y[train_size:]

## Insert interception

In [None]:
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)

## Regression result

In [None]:
# MSE

def mse(yhat, y):
    ans = (((yhat - y)**2).sum()) / yhat.shape[0]
    return ans

## Classification report

#### From scratch

In [None]:
class classification_report_fromSratch:
    def __init__(self, y_actual, y_predict):
        self.y_actual = y_actual
        self.y_predict = y_predict
        self.TP = sum((self.y_actual == 1) & (self.y_predict == 1))
        self.FN = sum((self.y_actual == 1) & (self.y_predict == 0))
        self.FP = sum((self.y_actual == 0) & (self.y_predict == 1))
        self.TN = sum((self.y_actual == 0) & (self.y_predict == 0))

    def accuracy(self):
        return (self.TP + self.TN)/(self.TP + self.TN + self.FP + self.FN)
        
    def precision(self):
        return (self.TP)/(self.TP + self.FP)

    def recall(self):
        return (self.TP)/(self.TP + self.FN)

    def f1(self):
        return (2 * self.precision() * self.recall())/(self.precision() + self.recall())

#### sklearn

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, yhat=None))

#### Confusion matrix

In [None]:
# from Lab03-02-NBM

from sklearn.metrics import confusion_matrix
import seaborn as sns

mat = confusion_matrix(y_test, yhat)

sns.heatmap(mat.T, annot=True, fmt="d",
           xticklabels=data.target_names, yticklabels=data.target_names)
plt.xlabel('true')
plt.ylabel('predicted')

## Batch/ Mini-batch/ Stochastic

In [None]:
list_of_used_ix = [] 
for i in range(self.max_iter):
    if self.method == "minibatch":
        ix = np.random.randint(0, X.shape[0])
        batch_X = X[ix:ix+batch_size]
        batch_y = y[ix:ix+batch_size]
    elif self.method == "sto":
        idx = np.random.randint(0, X.shape[0])
        while idx in list_of_used_ix:
            idx = np.random.randint(X_train.shape[0])
        batch_X = X[ix, :].reshape(1, -1)
        batch_y = y[ix]
        list_of_used_ix.append(idx)
        if len(list_of_used_ix) == X_train.shape[0]: list_of_used_ix = []
    elif self.method == "batch":
        batch_X = X
        batch_y = y
    else:
        print("Method is not match")

## Closs Validation

In [None]:
K_max = 10

variation_list = []
K_list = range(2,K_max+1)
for K in K_list:
    self.fit(X, K)
    variation_list.append(self.variation)
plt.plot(K_list, variation_list)
plt.xlabel('K')
plt.ylabel('Variation')