In [1]:
import torch
import torch.nn as nn

import numpy as np
import pandas as pd

from sklearn import linear_model, model_selection
from sklearn.datasets import load_wine
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

binomial_classification_data_path = './data/diabetes.csv'
binomial_classification_feature_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi',  'pedigree', 'age',  'label']

multinomial_classification_data_info = load_wine()
multinomial_classification_feature_names = multinomial_classification_data_info.feature_names


test_data_ratio = 0.3

random_state = 0

num_binomial_class = 2
num_multinomial_class = 3

learning_rate = 5e-3
epochs = 200

criterion = nn.CrossEntropyLoss()

In [2]:
class BinomialClassificationDataLoader:
  def __init__(self, path, feature_names, test_data_ratio, random_state):
    self.data_path = path
    self.feature_names = feature_names
    self.test_data_ratio = test_data_ratio
    self.random_state = random_state

    self.csv_infos = self.get_csv_infos()
    self.csv_data, self.csv_label = self.get_csv_dataset()
    self.train_data, self.train_label, self.test_data, self.test_label = self.get_split_data()
    self.min_max_normalize_data()

  def __call__(self, flag):
    if flag == 'train':
      return self.train_data, self.train_label
    elif flag == 'test':
      return self.test_data, self.test_label

  def get_csv_infos(self):
    df = pd.read_csv(self.data_path, header=None).drop([0], axis=0)
    df.columns = self.feature_names

    return df

  def get_csv_dataset(self):
    data, label = self.csv_infos[self.feature_names[:-1]], self.csv_infos[self.feature_names[-1]].astype(int)

    return data.to_numpy().astype(np.float32), label.to_numpy().astype(np.float32)

  def get_split_data(self):
    train_data, test_data, train_label, test_label = model_selection.train_test_split(
        self.csv_data, self.csv_label, test_size=self.test_data_ratio, random_state = self.random_state
    )

    return train_data, train_label, test_data, test_label

  def min_max_normalize_data(self):
    min_values = np.min(self.train_data, axis=0)
    max_values = np.max(self.train_data, axis=0)

    self.train_data = (self.train_data - min_values) / (max_values - min_values)
    self.test_data = (self.test_data - min_values) / (max_values - min_values)


class MultinomialClassificationDataLoader:
  def __init__(self, data_info, feature_names, test_data_ratio, random_state):
    self.data_info = data_info
    self.feature_names = feature_names
    self.test_data_ratio = test_data_ratio
    self.random_state = random_state

    self.data_infos = self.get_data_infos()
    self.data, self.label = self.get_dataset()

    self.train_data, self.train_label, self.test_data, self.test_label = self.get_split_data()
    self.min_max_normalize_data()

  def __call__(self, flag):
    if flag == 'train':
      return self.train_data, self.train_label
    elif flag == 'test':
      return self.test_data, self.test_label

  def get_data_infos(self):
    df = pd.DataFrame(self.data_info.data, columns=self.feature_names)
    df['label'] = self.data_info.target

    return df

  def get_dataset(self):
    data, label = self.data_infos.drop(columns=['label']), self.data_infos['label']

    return data.to_numpy().astype(np.float32), label.to_numpy().astype(np.float32)

  def get_split_data(self):
    train_data, test_data, train_label, test_label = model_selection.train_test_split(
        self.data, self.label, test_size=self.test_data_ratio, random_state = self.random_state
    )
    return train_data.astype(np.float32), train_label.astype(np.float32), test_data.astype(np.float32), test_label.astype(np.float32)

  def min_max_normalize_data(self):
    min_values = np.min(self.train_data, axis=0)
    max_values = np.max(self.train_data, axis=0)

    self.train_data = (self.train_data - min_values) / (max_values - min_values)
    self.test_data = (self.test_data - min_values) / (max_values - min_values)


binomial_classification_dataloader = BinomialClassificationDataLoader(binomial_classification_data_path, binomial_classification_feature_names, test_data_ratio, random_state)
binomial_classification_train_data, binomial_classification_train_label = binomial_classification_dataloader('train')
binomial_classification_test_data, binomial_classification_test_label = binomial_classification_dataloader('test')

multinomial_classifcation_dataloader = MultinomialClassificationDataLoader(multinomial_classification_data_info, multinomial_classification_feature_names, test_data_ratio, random_state)
multinomial_classifcation_train_data, multinomial_classifcation_train_label = multinomial_classifcation_dataloader('train')
multinomial_classifcation_test_data, multinomial_classifcation_test_label = multinomial_classifcation_dataloader('test')

print(f'binomial classification train data: {binomial_classification_train_data.shape}/binomial classification train label: {binomial_classification_train_label.shape}')
print(f'binomial classification test data: {binomial_classification_test_data.shape}/binomial classification test label: {binomial_classification_test_label.shape}')

print(f'multionomial classification train data: {multinomial_classifcation_train_data.shape}/multionomial classification train label: {multinomial_classifcation_train_label.shape}')
print(f'multionomial classification test data: {multinomial_classifcation_test_data.shape}/multionomial classification test label: {multinomial_classifcation_test_label.shape}')

binomial classification train data: (537, 8)/binomial classification train label: (537,)
binomial classification test data: (231, 8)/binomial classification test label: (231,)
multionomial classification train data: (124, 13)/multionomial classification train label: (124,)
multionomial classification test data: (54, 13)/multionomial classification test label: (54,)


In [5]:
class TorchBinomialLogisticRegression(nn.Module):
  def __init__(self, data, num_class):
    super(TorchBinomialLogisticRegression, self).__init__()
    self.logistic_regressor = nn.Linear(data.shape[1], num_class)

  def forward(self, data):
    logit = self.logistic_regressor(data)

    return logit

  def get_params(self):
    torch_weight, torch_bias = self.logistic_regressor.weight.detach().cpu().numpy(), self.logistic_regressor.bias.detach().cpu().numpy()

    return torch_weight, torch_bias

class TorchMultinomialLogisticRegression(nn.Module):
  def __init__(self, data, num_class):
    super(TorchMultinomialLogisticRegression, self).__init__()
    self.logistic_regressor = nn.Linear(data.shape[1], num_class)

  def forward(self, data):
    logit = self.logistic_regressor(data)

    return logit

  def get_params(self):
    torch_weight, torch_bias = self.logistic_regressor.weight.detach().cpu().numpy(), self.logistic_regressor.bias.detach().cpu().numpy()

    return torch_weight, torch_bias

class ScikitLearnLogisticRegression:
  def __init__(self):
    self.logistic_regressor = linear_model.LogisticRegression(penalty='l2')

  def __call__(self, data):
    pred = self.logistic_regressor.predict(data)

    return pred

  def get_params(self):
    sklearn_weight, sklearn_bias = self.logistic_regressor.coef_, self.logistic_regressor.intercept_

    return sklearn_weight, sklearn_bias


binomial_sklearn_classifier = ScikitLearnLogisticRegression()
binomial_torch_classifier = TorchBinomialLogisticRegression(binomial_classification_train_data, num_binomial_class).to(device)
binomial_torch_model_optimizer = torch.optim.SGD(binomial_torch_classifier.parameters(), lr =learning_rate)

multinomial_sklearn_classifier = ScikitLearnLogisticRegression()
multinomial_torch_classifier = TorchMultinomialLogisticRegression(multinomial_classifcation_train_data, num_multinomial_class).to(device)
multinomial_torch_model_optimizer = torch.optim.SGD(multinomial_torch_classifier.parameters(), lr =learning_rate)

In [6]:
def train_sklearn_model(data, label, model):
  model.logistic_regressor.fit(data, label)


def train_torch_model(data, label, model, criterion, optimizer, epochs, device):
  model.train()

  data, label = torch.tensor(data, dtype=torch.float32).to(device), torch.tensor(label, dtype=torch.long).to(device)

  for epoch in range(epochs):
    optimizer.zero_grad()

    result = model(data)

    loss = criterion(result, label)

    loss.backward()
    optimizer.step()


train_sklearn_model(binomial_classification_train_data, binomial_classification_train_label, binomial_sklearn_classifier)
train_torch_model(binomial_classification_train_data, binomial_classification_train_label, binomial_torch_classifier, criterion, binomial_torch_model_optimizer, epochs, device)

train_sklearn_model(multinomial_classifcation_train_data, multinomial_classifcation_train_label, multinomial_sklearn_classifier)
train_torch_model(multinomial_classifcation_train_data, multinomial_classifcation_train_label, multinomial_torch_classifier, criterion, multinomial_torch_model_optimizer, epochs, device)

In [7]:
def show_parameters(sklearn_model, torch_model):
    sklearn_weight, sklearn_bias = sklearn_model.get_params()
    torch_weight, torch_bias = torch_model.get_params()

    print(f'sklearn weight: {sklearn_weight}\ntorch weight: {torch_weight}')
    print(f'sklearn bias: {sklearn_bias}\ntorch bias: {torch_bias}')


show_parameters(binomial_sklearn_classifier, binomial_torch_classifier)
show_parameters(multinomial_sklearn_classifier, multinomial_torch_classifier)

sklearn weight: [[ 1.0707948   4.33354734 -0.4743674   0.53145303 -0.07057994  2.95318117
   1.16876891  1.31638619]]
torch weight: [[-0.17271921  0.2857561   0.13505211 -0.13410577 -0.1989493   0.23358959
   0.10040203  0.19158544]
 [ 0.13340668  0.2421252   0.21998887 -0.2636144   0.1104598   0.2599634
  -0.03106291  0.2870178 ]]
sklearn bias: [-5.32209008]
torch bias: [ 0.31421492 -0.15382479]
sklearn weight: [[ 1.33113482  0.06162319  0.483231   -1.06054689  0.06532169  0.77523869
   1.51651302 -0.4735789   0.14388556  0.43597336  0.28142489  1.14143624
   1.92525644]
 [-1.67642761 -0.87331901 -0.92009193  0.58843948 -0.24633828 -0.11866864
   0.19239889  0.04369331  0.48606774 -1.74202569  0.88689904  0.51395912
  -1.74774518]
 [ 0.34529279  0.81169581  0.43686093  0.47210741  0.1810166  -0.65657006
  -1.70891191  0.42988559 -0.6299533   1.30605233 -1.16832393 -1.65539536
  -0.17751125]]
torch weight: [[ 0.32069722 -0.17195751 -0.02639375 -0.0076359   0.0171936   0.00912023
   0.0

In [10]:
def test_model(data, label, model, flag):
    result = model(data)

    accuracy = accuracy_score(label, result)
    print(f'acc: {accuracy}')

    if flag == 'binomial':
        recall = recall_score(label, result)
        precision = precision_score(label, result)
        f1_measure = f1_score(label, result)

        print(f'recall: {recall}')
        print(f'precision: {precision}')
        print(f'f1 score: {f1_measure}')

def test_torch_model(data, label, model, device, flag):
    model.eval()

    data, label = torch.tensor(data, dtype=torch.float32).to(device), torch.tensor(label, dtype=torch.float32).detach().cpu().numpy()

    with torch.no_grad():
        result = model(data)
        _, result = torch.max(result, 1)
        result = result.detach().cpu().numpy()
        accuracy = accuracy_score(label, result)
        print(f'acc: {accuracy}')

        if flag == 'binomial':
            recall = recall_score(label, result)
            precision = precision_score(label, result)
            f1_measure = f1_score(label, result)

            print(f'recall: {recall}')
            print(f'precision: {precision}')
            print(f'f1 score: {f1_measure}')


print('Scikit-Learn Binomail Logstic Regression / Torch Binomial Logstic Regression')
test_model(binomial_classification_test_data, binomial_classification_test_label, binomial_sklearn_classifier, 'binomial')
test_torch_model(binomial_classification_test_data, binomial_classification_test_label, binomial_torch_classifier, device, 'binomial')

print('Scikit-Learn Multinomial Logstic Regression / Torch Multinomial Logstic Regression')
test_model(multinomial_classifcation_test_data, multinomial_classifcation_test_label, multinomial_sklearn_classifier, 'multinomial')
test_torch_model(multinomial_classifcation_test_data, multinomial_classifcation_test_label, multinomial_torch_classifier, device, 'multinomial')


Scikit-Learn Binomail Logstic Regression / Torch Binomial Logstic Regression
acc: 0.7662337662337663
recall: 0.4594594594594595
precision: 0.7083333333333334
f1 score: 0.5573770491803278
acc: 0.6796536796536796
recall: 0.0
precision: 0.0
f1 score: 0.0
Scikit-Learn Multinomial Logstic Regression / Torch Multinomial Logstic Regression
acc: 1.0
acc: 0.8518518518518519


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


: 