In [None]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

#Importing the datasets-------------------------------------------------------
print ("Importing datasets")
df = pd.read_csv('DATA.csv')

In [None]:
#Preparing ML Datasets--------------------------------------------------------
#if preprocessing from original data is necessary, do it here.
print ("Preparing ML datasets")
full_dataset = df[["A",
                   "B",
                   "C",
                   "D",
                   "E",
                   "F"
                   ]].copy()

In [None]:
#Define X and Y for each regression--------------------------------------------
X = full_dataset.iloc[:, :-1].values #X is all the value except the last column
y = full_dataset.iloc[:,5].values #Y is (in this case) the 4th column. The value needs to be changed according to your dataframe.

#Splitting all datasets into train and test------------------------------------
print ("Train/test split")
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest= train_test_split(X,y,test_size = 0.20,random_state = 0)

ytrain = np.resize(ytrain,(len(ytrain),1))
ytrain = torch.from_numpy(ytrain.astype(float))

ytest = np.resize(ytest,(len(ytest),1))
ytest = torch.from_numpy(ytest.astype(float))

xtrain = torch.from_numpy(xtrain)
xtest = torch.from_numpy(xtest)

In [None]:
# N is batch size; D_in is input dimension;# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 100, 5, 30, 1

# Construct our model by instantiating the class defined above.
# model = TwoLayerNet(D_in, H, D_out)
model = torch.nn.Sequential(
          torch.nn.Linear(D_in, H),
          torch.nn.ReLU(),
          torch.nn.Linear(H, D_out),
        )

# Construct our loss function and an Optimizer. The call to
model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
# loss_fn = torch.nn.MSELoss(size_average=False)
loss_fn = torch.nn.MSELoss(reduction='sum')
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(40000):
    # Forward pass: Compute predicted y by passing x to the model
    ypredtrain = model(xtrain.float())
    ypred = model(xtest.float())
    # Compute and print loss
    loss = loss_fn(ypredtrain, ytrain.float())
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
def r_score(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    residuals = y_true- y_pred
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((y_true-np.mean(y_true))**2)
    r_squared = 1 - (ss_res / ss_tot)    
    #r_scores = (r_squared ** 0.5)
    return r_squared

ypredtrain1 = ypredtrain.detach().numpy() #predicted data with training dataset
ytrain1 = ytrain.numpy() #training data itself
ypred1 = ypred.detach().numpy() #prediction from test data
ytest1 = ytest.numpy() #test data itself

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return (np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
r_torch1 =  r_score(ytrain1, ypredtrain1)
MAE_torch1 = mean_absolute_error(ytrain1, ypredtrain1)
MSE_torch1 = mean_squared_error(ytrain1, ypredtrain1)
MAPE_torch1 = mean_absolute_percentage_error(ytrain1, ypredtrain1)


In [None]:
r_torch2 =  r_score(ytest1, ypred1)
MAE_torch2 = mean_absolute_error(ytest1, ypred1)
MSE_torch2 = mean_squared_error(ytest1, ypred1)
MAPE_torch2 = mean_absolute_percentage_error(ytest1, ypred1)


In [None]:
errors = [{'Model Name': 'ANN PyTorch - Training', 'R2': r_torch1, 'MAE': MAE_torch1, 'MSE': MSE_torch1, 'MAPE (%)': np.mean(MAPE_torch1), 'Median Error (%)': statistics.median(MAPE_torch1)},
          {'Model Name': 'ANN PyTorch - Testing', 'R2': r_torch2, 'MAE': MAE_torch2, 'MSE': MSE_torch2, 'MAPE (%)': np.mean(MAPE_torch2), 'Median Error (%)': statistics.median(MAPE_torch2)}]
df_estimationerrors = pd.DataFrame(errors)
df_estimationerrors= df_estimationerrors.sort_values(by=['Median Error (%)'])
print(df_estimationerrors)
df_estimationerrors.to_csv("errors-test5.csv")