In [1]:
# regression using torch library(ann)
# Mehmet VARAN
# -*- coding: utf-8 -*-

# importing libraries
import torch
from numpy import vstack
from numpy import sqrt
import pandas as pd
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.nn import Linear
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
from torch.nn.init import xavier_uniform_
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# setting seed value
torch.manual_seed(5)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x2801bb789f0>

In [2]:
# takes dataset and does some preprocessing
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        df = pd.read_csv(path)
        # store the inputs and outputs
        self.X = df.values[:, 11:-4].astype('float32')
        self.y = df.values[:, -4:-1].astype('float32')
        # printing datas separetly
        self.y = self.y.reshape((len(self.y), 3))
        print(self.X)
        print(self.y)
        
        
 
    # number of rows in the dataset
    def __len__(self):
        return len(self.X)
 
    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]
 
    # get indexes for train and test rows
    def get_splits(self, n_test=0.25):
        # determine sizes
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])

In [3]:
def prepare_data(path):
    # load the dataset
    dataset = CSVDataset(path)
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=32, shuffle=True)
    test_dl = DataLoader(test, batch_size=1024, shuffle=False)
    return train_dl, test_dl

In [4]:
class MLP(Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        # input to first hidden layer
        self.hidden1 = Linear(n_inputs, 100)
        xavier_uniform_(self.hidden1.weight)
        self.act1 = Sigmoid()
        # second hidden layer
        self.hidden2 = Linear(100, 30)
        xavier_uniform_(self.hidden2.weight)
        self.act2 = Sigmoid()
        # third hidden layer and output
        self.hidden3 = Linear(30, 3)
        xavier_uniform_(self.hidden3.weight)
 
    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
         # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        # third hidden layer and output
        X = self.hidden3(X)
        return X

In [5]:
def train_model(train_dl, model):
    # define the optimization
    criterion = MSELoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
    # enumerate epochs
    for epoch in range(100):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl): # 
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

In [6]:
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 3))
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate mse
    mse = mean_squared_error(actuals, predictions)
    return mse

In [7]:
# making prediction for given data
def predict(Dataset, model):
    # convert row to data
    data_pred = torch.tensor(Dataset.values)
    data_pred= data_pred.type(torch.FloatTensor)
    # make prediction
    yhat = model(data_pred)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [8]:
# getting raw data and splitting as train and test
'''
data = pd.read_csv('robot_ds.csv', sep = ';')
data_train, data_test = train_test_split(data, test_size= 0.25, random_state=5)

data_train.to_csv('robot_train.csv', index=False)
data_test.to_csv('robot_test.csv', index=False)
'''

"\ndata = pd.read_csv('robot_ds.csv', sep = ';')\ndata_train, data_test = train_test_split(data, test_size= 0.25, random_state=5)\n\ndata_train.to_csv('robot_train.csv', index=False)\ndata_test.to_csv('robot_test.csv', index=False)\n"

In [9]:
# getting train data 
path = 'robot_train.csv'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model = MLP(200)

# train the model
train_model(train_dl, model)

# evaluate the model
mse = evaluate_model(test_dl, model)
print('MSE: %.4f, RMSE: %.4f' % (mse, sqrt(mse)))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[ -8.4  30.    0. ]
 [-10.8  20.4   4. ]
 [ -8.4   1.2   0. ]
 ...
 [  9.6  -2.4  11.6]
 [ -7.2  28.8   0. ]
 [ 13.2   3.6   4. ]]
1116 372
MSE: 1.4226, RMSE: 1.1927


In [10]:
# getting test(never used) data, separating into inputs and XYZ
data_test = pd.read_csv('robot_test.csv')
data_testFinal = data_test.iloc[:,10:-1]
data_testInputs = data_testFinal.iloc[:,1:-3]
data_testXYZ = data_test.iloc[:,-4:-1]

In [12]:
# making predictions and printing r2 score
yhat = predict(data_testInputs, model)
print('Predicted:', yhat)
print("R2 Score: %.4f" % r2_score(data_testXYZ, yhat))

# yhat is predicted data / data_testXYZ is real data

Predicted: [[  2.4377775    0.59632957   3.9751613 ]
 [-12.946435    21.395256     8.280129  ]
 [ -8.995576    34.72955      7.3918858 ]
 ...
 [  3.270839    -2.4597902    0.6434301 ]
 [-12.96333     18.64809      8.030453  ]
 [-12.093062    24.238232    11.545196  ]]
R2 Score: 0.9799
