In [1]:
import torch
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from pickle import dump



device = 'cuda' if torch.cuda.is_available() else 'cpu'\

In [2]:
class trainData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return len(self.X_data)

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(10, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)

In [4]:
#read in files
file_in = os.getcwd() + "/avacados/avocado.csv"
df = pd.read_csv(file_in)

print(df.head())


   Unnamed: 0        Date  AveragePrice  Total Volume     4046       4225  \
0           0  2015-12-27          1.33      64236.62  1036.74   54454.85   
1           1  2015-12-20          1.35      54876.98   674.28   44638.81   
2           2  2015-12-13          0.93     118220.22   794.70  109149.67   
3           3  2015-12-06          1.08      78992.15  1132.00   71976.41   
4           4  2015-11-29          1.28      51039.60   941.48   43838.39   

     4770  Total Bags  Small Bags  Large Bags  XLarge Bags          type  \
0   48.16     8696.87     8603.62       93.25          0.0  conventional   
1   58.33     9505.56     9408.07       97.49          0.0  conventional   
2  130.50     8145.35     8042.21      103.14          0.0  conventional   
3   72.58     5811.16     5677.40      133.76          0.0  conventional   
4   75.78     6183.95     5986.26      197.69          0.0  conventional   

   year  region  
0  2015  Albany  
1  2015  Albany  
2  2015  Albany  
3  2015 

In [5]:
df = df.drop(columns=['region', 'Date', 'XLarge Bags'])

In [6]:
#print(df.head)
X = df.iloc[:, df.columns != 'AveragePrice'] #grab first 13 elements for input
y = df.loc[:, df.columns == 'AveragePrice'].values   #seperate last element (target values)


print(X.dtypes)

Unnamed: 0        int64
Total Volume    float64
4046            float64
4225            float64
4770            float64
Total Bags      float64
Small Bags      float64
Large Bags      float64
type             object
year              int64
dtype: object


In [21]:
le = LabelEncoder()
#X = X.apply(le.fit_transform)
scaler = StandardScaler()
X['type'] = le.fit_transform(X['type'].astype(str))
X_train = scaler.fit_transform(X)
print(X_train)

[[-1.5653269  -0.22480975 -0.80622027 ... -1.02988101 -0.99983562
  -1.22128204]
 [-1.50073001 -0.27912775 -0.95279724 ... -1.02493487 -0.99983562
  -1.22128204]
 [-1.43613312  0.0519082  -0.90024357 ... -1.01669129 -0.99983562
  -1.22128204]
 ...
 [-0.98395486 -0.74291995 -0.75443101 ... -1.12179684  1.00016441
   1.97050371]
 [-0.91935797 -0.66448171 -0.65333685 ... -1.10469143  1.00016441
   1.97050371]
 [-0.85476108 -0.63162502 -0.39286172 ... -1.16796085  1.00016441
   1.97050371]]


In [8]:
#reshape data into a tensor of floats
train_data = trainData(torch.FloatTensor(X_train),torch.FloatTensor(y))
train_loader = DataLoader(dataset=train_data, batch_size=1, shuffle=True)

#build model and pass it to device (CPU or GPU)
model = NeuralNetwork().to(device)
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [9]:
def accuracy(y_pred, y_actual):
    #print(y_pred, y_actual)
    diff = (abs(y_pred - y_actual)/(y_pred + y_actual)).mean()
    diff = (1 - diff) * 100
    #print(diff)
    return diff

In [10]:
epochs = 10
for e in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    print(f"Epoch {e + 1}\n-------------------------------")
    for features, labels in train_loader:

        features, labels = features.to(device), labels.to(device)
        y_pred = model(features)
        loss = loss_fn(y_pred, labels.unsqueeze(1))
        acc = accuracy(y_pred, labels.unsqueeze(1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {e + 0:03}: | Loss: {epoch_loss / len(train_loader):.5f}')

print("Done Training!")

Epoch 1
-------------------------------


  return F.l1_loss(input, target, reduction=self.reduction)


Epoch 000: | Loss: 0.19913
Epoch 2
-------------------------------
Epoch 001: | Loss: 0.16619
Epoch 3
-------------------------------
Epoch 002: | Loss: 0.15801
Epoch 4
-------------------------------
Epoch 003: | Loss: 0.15269
Epoch 5
-------------------------------
Epoch 004: | Loss: 0.14871
Epoch 6
-------------------------------
Epoch 005: | Loss: 0.14468
Epoch 7
-------------------------------
Epoch 006: | Loss: 0.14127
Epoch 8
-------------------------------
Epoch 007: | Loss: 0.13925
Epoch 9
-------------------------------
Epoch 008: | Loss: 0.13717
Epoch 10
-------------------------------
Epoch 009: | Loss: 0.13604
Done Training!


In [12]:
print(y_pred, labels)


tensor([[1.1561]], grad_fn=<AddmmBackward>) tensor([[1.3600]])


In [13]:
# save the model
torch.save(model, '/Users/alexro/NERDS/ML_Sessions/avacado_model.pt')
# save the value label encoding
np.save('/Users/alexro/NERDS/ML_Sessions/encoding.npy', le.classes_)
# save the vvalue standard scaler
dump(scaler, open('/Users/alexro/NERDS/ML_Sessions/scaler.pkl','wb'))

  "type " + obj.__name__ + ". It won't be checked "


/Users/alexro/NERDS/ML_Sessions
