In [1]:
import torch
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from pickle import dump



device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'

In [2]:
class trainData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return len(self.X_data)

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(11, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)

In [4]:
#read in files
file_in = os.getcwd() + "/avacados/avocado.csv"
df = pd.read_csv(file_in)

print(df.head())


   Unnamed: 0        Date  AveragePrice  Total Volume     4046       4225  \
0           0  2015-12-27          1.33      64236.62  1036.74   54454.85   
1           1  2015-12-20          1.35      54876.98   674.28   44638.81   
2           2  2015-12-13          0.93     118220.22   794.70  109149.67   
3           3  2015-12-06          1.08      78992.15  1132.00   71976.41   
4           4  2015-11-29          1.28      51039.60   941.48   43838.39   

     4770  Total Bags  Small Bags  Large Bags  XLarge Bags          type  \
0   48.16     8696.87     8603.62       93.25          0.0  conventional   
1   58.33     9505.56     9408.07       97.49          0.0  conventional   
2  130.50     8145.35     8042.21      103.14          0.0  conventional   
3   72.58     5811.16     5677.40      133.76          0.0  conventional   
4   75.78     6183.95     5986.26      197.69          0.0  conventional   

   year  region  
0  2015  Albany  
1  2015  Albany  
2  2015  Albany  
3  2015 

In [5]:
df = df.drop(columns=['Date', 'XLarge Bags'])

In [6]:
#print(df.head)
X = df.iloc[:, df.columns != 'AveragePrice'] #grab first 13 elements for input
y = df.loc[:, df.columns == 'AveragePrice'].values   #seperate last element (target values)


print(X.dtypes)

Unnamed: 0        int64
Total Volume    float64
4046            float64
4225            float64
4770            float64
Total Bags      float64
Small Bags      float64
Large Bags      float64
type             object
year              int64
region           object
dtype: object


In [7]:
le = LabelEncoder()
#X = X.apply(le.fit_transform)
scaler = StandardScaler()
X['type'] = le.fit_transform(X['type'].astype(str))
X['region'] = le.fit_transform(X['region'].astype(str))

X_train = scaler.fit_transform(X)
print(X_train)

[[-1.5653269  -0.22771641 -0.23081597 ... -0.99983562 -1.22128204
  -1.7002522 ]
 [-1.50073001 -0.23042664 -0.23110251 ... -0.99983562 -1.22128204
  -1.7002522 ]
 [-1.43613312 -0.21208462 -0.23100731 ... -0.99983562 -1.22128204
  -1.7002522 ]
 ...
 [-0.98395486 -0.24233073 -0.2306933  ...  1.00016441  1.97050371
   1.70081131]
 [-0.91935797 -0.24162464 -0.2304279  ...  1.00016441  1.97050371
   1.70081131]
 [-0.85476108 -0.24125273 -0.22934712 ...  1.00016441  1.97050371
   1.70081131]]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['type'] = le.fit_transform(X['type'].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['region'] = le.fit_transform(X['region'].astype(str))


In [8]:
#reshape data into a tensor of floats
train_data = trainData(torch.FloatTensor(X_train),torch.FloatTensor(y))
train_loader = DataLoader(dataset=train_data, batch_size=1,  shuffle=True)

#build model and pass it to device (CPU or GPU)
model = NeuralNetwork().to(device)
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [9]:
def accuracy(y_pred, y_actual):
    #print(y_pred, y_actual)
    diff = (abs(y_pred - y_actual)/(y_pred + y_actual)).mean()
    diff = (1 - diff) * 100
    #print(diff)
    return diff

In [10]:
epochs = 5
for e in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    print(f"Epoch {e + 1}\n-------------------------------")
    for features, labels in train_loader:

        features, labels = features.to(device), labels.to(device)
        y_pred = model(features)
        loss = loss_fn(y_pred, labels)
        acc = accuracy(y_pred, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {e + 0:03}: | Loss: {epoch_loss / len(train_loader):.5f}')

print("Done Training!")

Epoch 1
-------------------------------
Epoch 000: | Loss: 0.22836
Epoch 2
-------------------------------
Epoch 001: | Loss: 0.20419
Epoch 3
-------------------------------
Epoch 002: | Loss: 0.19543
Epoch 4
-------------------------------
Epoch 003: | Loss: 0.19086
Epoch 5
-------------------------------
Epoch 004: | Loss: 0.18747
Done Training!


In [11]:
print(labels)
print(y_pred)

tensor([[1.0300]], device='cuda:0')
tensor([[1.1953]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [12]:
# save the model
torch.save(model, os.getcwd()+'/avacado_model_cuda.pt')
# save the value label encoding
np.save(os.getcwd()+'/encoding_cuda.npy', le.classes_)
# save the vvalue standard scaler
dump(scaler, open('scaler_cuda.pkl','wb'))

/Users/alexro/NERDS/ML_Sessions
