In [1]:
import torch
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from pickle import dump, load



device = 'cuda' if torch.cuda.is_available() else 'cpu'\

In [2]:
print(f'running on {device}')

running on cpu


In [3]:
#Load in data scaling
scaler = load(open('/Users/alexro/NERDS/ML_Sessions/scaler.pkl','rb'))

#Load in label encoding
le  = LabelEncoder()
le.classes_ = np.load('/Users/alexro/NERDS/ML_Sessions/encoding.npy', allow_pickle=True)



In [4]:
class testData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return len(self.X_data)

In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(10, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)

In [6]:
#load in model, model must be loaded in after its class is instantiated
#otherwise you will get a serialization error
model = torch.load('/Users/alexro/NERDS/ML_Sessions/avacado_model.pt')

In [7]:
#read in files
file_in = os.getcwd() + "/avacados/avac_sample.csv"
df = pd.read_csv(file_in)

df = df.drop(columns=['region', 'Date', 'XLarge Bags'])

print(df.head())


   Unnamed: 0  AveragePrice  Total Volume        4046       4225      4770  \
0           0          1.07     417232.18   278048.26   62485.97    714.93   
1           1          1.10     454702.00   382900.99   19543.18    522.81   
2           2          2.03       1794.39     1069.54     187.76      0.00   
3           3          1.38    1975524.70   833904.89  499191.31  10560.99   
4           4          1.16    2197763.70  1420318.78  298081.99  25682.97   

   Total Bags  Small Bags  Large Bags          type  year  
0    75983.02    46290.32    29678.76  conventional  2015  
1    51735.02    40505.16    11199.95  conventional  2015  
2      537.09      500.00       37.09       organic  2016  
3   631867.51   584294.01    29543.77  conventional  2017  
4   453679.96   309652.75   143978.69  conventional  2015  


In [14]:
#Load data
#print(df.head)
X = df.iloc[:, df.columns != 'AveragePrice'] #grab first 13 elements for input
y = df.loc[:, df.columns == 'AveragePrice']   #seperate last element (target values)

#scaler = StandardScaler()
X['type'] = le.fit_transform(X['type'].astype(str))
X_test = scaler.fit_transform(X)

print(y)

   AveragePrice
0          1.07
1          1.10
2          2.03
3          1.38
4          1.16
5          1.13
6          1.13
7          1.17
8          1.02
9          2.39


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [18]:
#reshape data into a tensor of floats
test_data = testData(torch.FloatTensor(X_test),torch.FloatTensor(y.values))
dataloader = DataLoader(dataset=test_data, batch_size=1, shuffle=True)



In [37]:
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
        print(f'''The predicted model generated {round(pred.item(),5)}\t actual was {round(y.item(), 5)} \t difference was {round((pred-y).item(),5)}''')

The predicted model generated 1.24017	 actual was 1.17 	 difference was 0.07017
The predicted model generated 2.23422	 actual was 2.39 	 difference was -0.15578
The predicted model generated 1.00428	 actual was 1.16 	 difference was -0.15572
The predicted model generated 0.66079	 actual was 1.02 	 difference was -0.35921
The predicted model generated 1.4121	 actual was 1.13 	 difference was 0.2821
The predicted model generated 1.30482	 actual was 1.38 	 difference was -0.07518
The predicted model generated 1.15614	 actual was 1.07 	 difference was 0.08614
The predicted model generated 1.21759	 actual was 1.13 	 difference was 0.08759
The predicted model generated 1.17116	 actual was 1.1 	 difference was 0.07116
The predicted model generated 2.49143	 actual was 2.03 	 difference was 0.46143
