In [1]:
import torch
import torch.nn as nn
import torch.utils.data as Data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import datetime
import seaborn as sns

In [2]:
if(torch.cuda.is_available()):
    device = torch.device("cuda")
    print(device, torch.cuda.get_device_name(0))
else:
    device= torch.device("cpu")
    print(device)

cuda NVIDIA GeForce RTX 3060


### Read data file to dataframe --> convert to numpy array

In [3]:
fname = "1. Data - Iris.xlsx"  #the data file is located at the same directory
df=pd.read_excel(fname)
df.head(5)

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
df.columns

Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [5]:
dfX = df[['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']]
dfY = df['Species']

In [6]:
#convert dfY from string to integer 0, 1, 2
lstY1 = dfY.values.tolist()
lstY = []
for elt in lstY1:
  if(elt == 'setosa'):
    lstY.append(0)
  elif (elt == 'versicolor'):
    lstY.append(1)
  elif (elt == 'virginica'):
    lstY.append(2)
  else:
    print("Wrong label", elt)

In [7]:
#convert data to numpy array
numpyX = np.array(dfX.values)
numpyY = np.array(lstY)
print(numpyX.shape, numpyY.shape)

(150, 4) (150,)


### Split input data to train and test data

In [8]:
trainX, testX, trainY, testY = train_test_split(numpyX, numpyY, test_size=0.20, random_state=0)
print(trainX.shape, testX.shape, trainY.shape, testY.shape)

(120, 4) (30, 4) (120,) (30,)


### Define NN

In [9]:
MyNet = nn.Sequential(
    nn.Linear(4, 56),
    nn.Dropout(p=0.5),
    nn.Sigmoid(),
    nn.Linear(56, 56),
    nn.Dropout(p=0.5),
    nn.Sigmoid(),
    nn.Linear(56, 3),
)
MyNet.to(device)

Sequential(
  (0): Linear(in_features=4, out_features=56, bias=True)
  (1): Dropout(p=0.5, inplace=False)
  (2): Sigmoid()
  (3): Linear(in_features=56, out_features=56, bias=True)
  (4): Dropout(p=0.5, inplace=False)
  (5): Sigmoid()
  (6): Linear(in_features=56, out_features=3, bias=True)
)

### For the 1st time, go to Appendix to practice training loop step by step

### Train NN

In [None]:
train_lossLst = []
validation_lossLst = []

In [None]:
# split training data to train and validation
trainX, validationX, trainY, validationY = train_test_split(trainX, trainY, test_size=0.20, random_state=0)
tensor_trainX = torch.FloatTensor(trainX).to(device)
tensor_trainY = torch.LongTensor(trainY).to(device)

tensor_validationX = torch.FloatTensor(validationX).to(device)
tensor_validationY = torch.LongTensor(validationY).to(device)

In [None]:
print(tensor_trainX.shape, tensor_trainY.shape, tensor_validationX.shape, tensor_validationY.shape)

In [None]:
BATCH_SIZE = 16   #try 1/5, 1/10, 1/15 of the training data

In [None]:
# define data loader
torch_dataset = Data.TensorDataset(tensor_trainX, tensor_trainY)
loader = Data.DataLoader(
    dataset=torch_dataset,  
    batch_size=BATCH_SIZE,      
    shuffle=True,       
    num_workers=0,   
)

In [None]:
# initialize NN weights
for name, param in MyNet.named_parameters():
  if(param.requires_grad):
    torch.nn.init.normal_(param, mean=0.0, std=0.02)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MyNet.parameters(), lr=0.0003)

In [None]:
# train NN 
epoch_lossLst=[]
for epoch in range(1, 500):
  if(epoch%100 == 0):
    print(epoch, end=",")
  for (batchX, batchY) in loader:
    batchY_hat = MyNet(batchX)
    loss = loss_func(batchY_hat, batchY)
    epoch_lossLst.append(float(loss))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  avg = sum(epoch_lossLst) / len(epoch_lossLst)
  train_lossLst.append(avg) 

  with torch.no_grad():
      tensor_validationY_hat = MyNet(tensor_validationX)
      tensor_validationY = tensor_validationY.view(-1)
      validatiion_loss = loss_func(tensor_validationY_hat, tensor_validationY)
      validation_lossLst.append(float(validatiion_loss))

In [None]:
current_time = datetime.datetime.now()
formatted_time = current_time.strftime("%m-%d-%H-%M-%S")
print(formatted_time)

In [None]:
# save model of this training
fname = formatted_time + ".pth"
torch.save(MyNet.state_dict(), fname)

In [None]:
fig=plt.figure(figsize=(12, 6))
plt.plot(train_lossLst, color='blue')
plt.plot(validation_lossLst, color='orange')
plt.show()

### Mannually go back to train again --> observe train, validation loss --> train again...

### After train several times, pick up the best model

In [None]:
# Load the best model
MyNet.load_state_dict(torch.load('05-07-09-22-40.pth'))

In [None]:
MyNet.eval()
MyNet.to(device)

In [None]:
tensor_testX = torch.FloatTensor(testX).to(device)
tensor_testY = torch.LongTensor(testY).to(device)

In [None]:
tensor_testY_hat = MyNet(tensor_testX)
tensor_testY_hat = torch.softmax(tensor_testY_hat, 1)
MaxIdxOfEachRow = torch.max(tensor_testY_hat, 1)[1]

In [None]:
conf_matrix = confusion_matrix(tensor_testY.cpu().numpy(), MaxIdxOfEachRow.cpu().numpy())

In [None]:
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g')

# Appendix 
練習: Send a batch of input data to LSTM step by step

In [10]:
# split training data to train and validation
trainX, validationX, trainY, validationY = train_test_split(trainX, trainY, test_size=0.20, random_state=0)
tensor_trainX = torch.FloatTensor(trainX).to(device)
tensor_trainY = torch.LongTensor(trainY).to(device)

In [11]:
tensor_validationX = torch.FloatTensor(validationX).to(device)
tensor_validationY = torch.LongTensor(validationY).to(device)

In [13]:
BATCH_SIZE = 16   #try 1/10, of the training data

In [14]:
# define data loader
torch_dataset = Data.TensorDataset(tensor_trainX, tensor_trainY)
loader = Data.DataLoader(
    dataset=torch_dataset,  
    batch_size=BATCH_SIZE,      
    shuffle=True,       
    num_workers=0,   
)

In [15]:
# initialize NN weights
for name, param in MyNet.named_parameters():
  if(param.requires_grad):
    torch.nn.init.normal_(param, mean=0.0, std=0.02)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MyNet.parameters(), lr=0.0003)

In [16]:
for (batchX, batchY) in loader:
    break
print(batchX.shape, batchY)

torch.Size([16, 4]) tensor([0, 1, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 1, 0, 2], device='cuda:0')


In [17]:
batchY_hat = MyNet(batchX)
print(batchY_hat)

tensor([[-3.1829e-02, -3.1540e-02,  1.1092e-05],
        [-3.4164e-02, -3.1087e-02, -7.0757e-03],
        [-3.0728e-02, -3.2053e-02, -1.9588e-03],
        [-3.0091e-02, -2.9853e-02, -4.5190e-03],
        [-2.8413e-02, -2.8190e-02, -5.4611e-03],
        [-3.3065e-02, -3.1026e-02, -7.0320e-03],
        [-3.2709e-02, -3.0799e-02, -7.0550e-03],
        [-3.2889e-02, -3.1643e-02, -2.2408e-03],
        [-3.3073e-02, -2.9576e-02, -4.4447e-03],
        [-3.3439e-02, -2.8947e-02, -3.9666e-03],
        [-2.9445e-02, -3.0061e-02, -1.0546e-03],
        [-3.2403e-02, -3.0047e-02,  7.5985e-04],
        [-3.3853e-02, -2.8625e-02, -5.4465e-03],
        [-2.9634e-02, -2.9309e-02, -8.8691e-03],
        [-2.5259e-02, -3.1236e-02,  1.4824e-04],
        [-3.1992e-02, -3.0186e-02, -5.8868e-03]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


In [18]:
loss = loss_func(batchY_hat, batchY)
print(loss)

tensor(1.0955, device='cuda:0', grad_fn=<NllLossBackward0>)


In [19]:
optimizer.zero_grad()
loss.backward()
optimizer.step()