### This NoteBook provides some Notes about Classification for Custom DataSet using Logistic Regression

In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as f
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import os

In [7]:
## config (デバイスの設定)
device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Read and prepare the Data

In [8]:
## read the data 
file_path = os.path.join(os.getcwd(), 'x', 'Knight', 'xy.csv')

# ()はファイルのパス
df_train = pd.read_csv(r'./xy.csv')
df_train.head()

Unnamed: 0,x,Knight
0,0.387291,-1.353185
1,0.500177,-8.408062
2,1.479996,-1.02924
3,2.740224,-8.442742
4,3.313055,-0.818826


In [9]:
## split the data (特徴量とラベルにデータを分割し、さらにトレーニングデータと検証データに分ける)
X = df_train.iloc[:, 1:-1]
y = df_train['y']

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, 
                                                      shuffle=True, random_state=42)

# Convert Class String labels into Integers
# ラベルのエンコードとデータの標準化
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_valid_encoded = encoder.transform(y_valid)

## reshape them（）
y_train_encoded = y_train_encoded.reshape(-1, 1)
y_valid_encoded = y_valid_encoded.reshape(-1, 1)

# Normalize the input features of the dataset
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)

## convert to float32 then to tensors（テンソルへの変換）
X_train_scaled = torch.from_numpy(X_train_scaled.astype(np.float32))
X_valid_scaled = torch.from_numpy(X_valid_scaled.astype(np.float32))
y_train_encoded = torch.from_numpy(y_train_encoded.astype(np.float32))
y_valid_encoded = torch.from_numpy(y_valid_encoded.astype(np.float32))

##  for target  --> this is a MUST
y_train_encoded = y_train_encoded.type(torch.LongTensor)
y_valid_encoded = y_valid_encoded.type(torch.LongTensor)

KeyError: 'y'

In [None]:
## if you do not do that you will catch an error
y_train_encoded = torch.squeeze(y_train_encoded)
y_valid_encoded = torch.squeeze(y_valid_encoded)

#### Build The Model

In [None]:
## Create a class for Logistic Regression（ロジスティック回帰モデルの定義）
class LogisticRegression(nn.Module):
    def __init__(self, input_features):
        super(LogisticRegression, self).__init__()
        
        self.linear = nn.Linear(input_features, 7) ## --> 1 output class
    
    def forward(self, x):
        y_pred = self.linear(x)  ## no Softmax for multiclass --< CrossEtropy handles this
        return y_pred
    


## intance 
log_reg = LogisticRegression(input_features=X_train_scaled.shape[1])


## Training
print('Training started __________________________________ \n')

## criteria
learning_rate = 0.01
n_epochs = 10000

loss = nn.CrossEntropyLoss()   
optimizer = torch.optim.SGD(log_reg.parameters(), lr=learning_rate)


for epoch in range(n_epochs):
    ## predicting
    y_pred_train = log_reg(X_train_scaled)
    ## loss
    l = loss(y_pred_train, y_train_encoded)
    ## backpropagation
    l.backward()
    ## optimizaion step
    optimizer.step()
    ## empty the gradients
    optimizer.zero_grad()


print('Training finished ____________________________________ \n') 

_, y_pred_train_cls = torch.max(y_pred_train, dim=1)
train_acc = (y_pred_train_cls==y_train_encoded).sum()/y_train_encoded.shape[0]
print(f'Training Accuracy is --> {train_acc*100:.3f} %')

Training started __________________________________ 

Training finished ____________________________________ 

Training Accuracy is --> 91.496 %


#### Evaluation

In [None]:
## when you evaluate you must stop gradients to not kick the requires_grad=True
with torch.no_grad():
    y_pred_valid = log_reg(X_valid_scaled)
    
    _, y_pred_valid_cls = torch.max(y_pred_valid, dim=1)
    valid_acc = (y_pred_valid_cls==y_valid_encoded).sum()/y_valid_encoded.shape[0]
    
    print(f'Validation Accuracy is --> {valid_acc*100:.3f} %')

Validation Accuracy is --> 92.432 %


### Done !