## Julie's notebook for Neural Network modeling

In [46]:
# Load Modules
# Set seed value so results are reproducible
from numpy.random import seed
seed(42)
import numpy as np

# Python package for dataframe creation and manipulation
import pandas as pd
# Python package for third party operating systems
import os

# Generate some fake data with 3 features
from sklearn.datasets import make_classification
# Package for modeling and scaling
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Package for one hot encoding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# # Python package for plotting charts
# import matplotlib.pyplot as plt

# # Python package for low level math functions
# import numpy as np

# # Python package for statistical modeling: split dataset
# from sklearn.model_selection import train_test_split
# # Python package for statistical modeling: support vector classifier
# from sklearn.svm import SVC 
# # Python package for statistical modeling: 
# from sklearn.metrics import classification_report

In [17]:
# Read File

honeybee_csv = pd.read_csv(os.path.join('Resources', 'Output', 'HoneybeeDatasetSVCModel.csv'))
honeybee_csv.head()

Unnamed: 0,Region,FIPS,ColonyCount,ColonyGrowth,GrowthOutcome,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,YearRecorded,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
0,3,1,16000,,,928000,58,0.69,640000,28000,1995,0.0,1579.61,0.0,0.0,0.0,1579.61
1,3,1,15000,-6.25,0.0,960000,64,0.87,835000,96000,1996,0.0,819.24,0.0,0.0,0.0,819.24
2,3,1,14000,-6.67,0.0,924000,66,0.81,748000,92000,1997,0.0,14781.55,0.0,0.0,0.0,14781.55
3,3,1,16000,14.29,1.0,1136000,71,0.72,818000,159000,1998,0.0,4048.35,0.0,0.0,0.0,4048.35
4,3,1,17000,6.25,1.0,1156000,68,0.56,647000,185000,1999,0.0,2758.42,0.0,0.0,0.0,2758.42


In [18]:
# Drop NaN Rows

honeybee_csv = honeybee_csv.apply(pd.to_numeric, errors='coerce')
honeybee_csv = honeybee_csv.dropna()
honeybee_csv.head()

Unnamed: 0,Region,FIPS,ColonyCount,ColonyGrowth,GrowthOutcome,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,YearRecorded,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
1,3,1,15000,-6.25,0.0,960000,64,0.87,835000,96000,1996,0.0,819.24,0.0,0.0,0.0,819.24
2,3,1,14000,-6.67,0.0,924000,66,0.81,748000,92000,1997,0.0,14781.55,0.0,0.0,0.0,14781.55
3,3,1,16000,14.29,1.0,1136000,71,0.72,818000,159000,1998,0.0,4048.35,0.0,0.0,0.0,4048.35
4,3,1,17000,6.25,1.0,1156000,68,0.56,647000,185000,1999,0.0,2758.42,0.0,0.0,0.0,2758.42
5,3,1,16000,-5.88,0.0,1248000,78,0.59,736000,187000,2000,0.0,3305.17,0.0,0.0,0.0,3305.17


In [92]:
# Target Outcome Column

# target = honeybee_csv['GrowthOutcome']
# target_names = ['Growth', 'No-Growth']
# # print(target.shape,target_names.shape)
X = honeybee_csv.drop("GrowthOutcome",axis=1)
y = honeybee_csv["GrowthOutcome"]
print(X.shape,y.shape)

(894, 16) (894,)


In [74]:
# use TTS to creat training and testing data

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [75]:
# Data processing - must scale first.  Scale both training and testing data
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

X_test_scaled
# X_train_scaled

array([[ 0.28243457,  1.04050836, -0.63177888, ..., -0.25572509,
        -0.2343722 ,  0.32928972],
       [ 1.31086481, -0.05208777,  1.20402171, ..., -0.27317592,
        -0.24042439, -0.22574393],
       [-1.7744259 ,  0.33353439,  0.12199355, ..., -0.27677895,
        -0.24042439, -0.4590697 ],
       ...,
       [-0.74599566, -0.8233321 , -0.59530602, ..., -0.27677895,
        -0.24042439, -0.50609352],
       [-0.74599566, -0.11635813, -0.60746364, ..., -0.27677895,
        -0.24042439, -0.2144087 ],
       [ 0.28243457, -0.56625065, -0.25489266, ..., -0.06048975,
        -0.24042439,  0.02751379]])

In [93]:
# One-hot encode
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [94]:
# Create the model

NN_model = Sequential()

In [99]:
# Add layer
number_hidden_nodes = 4
NN_model.add(Dense(units=16,
                activation='relu', input_dim=16))

In [100]:
# Add in output layer
number_classes = 2
NN_model.add(Dense(units=number_classes, activation='softmax'))
# arbitrary # classes based on in class exercises.


In [101]:
# Model Summary

NN_model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 3)                 51        
_________________________________________________________________
dense_17 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_18 (Dense)             (None, 16)                48        
_________________________________________________________________
dense_19 (Dense)             (None, 2)                 34        
Total params: 59
Trainable params: 59
Non-trainable params: 0
_________________________________________________________________


In [102]:
# Compile using loss and optimizer.
NN_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [103]:
# Train the model: Random # of epochs selected by Julie.
NN_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=500,
    shuffle=True,
    verbose=2
)

Train on 670 samples
Epoch 1/500
670/670 - 1s - loss: 0.6981 - accuracy: 0.4642
Epoch 2/500
670/670 - 0s - loss: 0.6738 - accuracy: 0.6463
Epoch 3/500
670/670 - 0s - loss: 0.6591 - accuracy: 0.6463
Epoch 4/500
670/670 - 0s - loss: 0.6501 - accuracy: 0.6463
Epoch 5/500
670/670 - 0s - loss: 0.6432 - accuracy: 0.6463
Epoch 6/500
670/670 - 0s - loss: 0.6361 - accuracy: 0.6463
Epoch 7/500
670/670 - 0s - loss: 0.6263 - accuracy: 0.6463
Epoch 8/500
670/670 - 0s - loss: 0.6126 - accuracy: 0.6463
Epoch 9/500
670/670 - 0s - loss: 0.5951 - accuracy: 0.6881
Epoch 10/500
670/670 - 0s - loss: 0.5733 - accuracy: 0.7507
Epoch 11/500
670/670 - 0s - loss: 0.5471 - accuracy: 0.7746
Epoch 12/500
670/670 - 0s - loss: 0.5194 - accuracy: 0.8015
Epoch 13/500
670/670 - 0s - loss: 0.4881 - accuracy: 0.8179
Epoch 14/500
670/670 - 0s - loss: 0.4564 - accuracy: 0.8343
Epoch 15/500
670/670 - 0s - loss: 0.4235 - accuracy: 0.8493
Epoch 16/500
670/670 - 0s - loss: 0.3886 - accuracy: 0.8687
Epoch 17/500
670/670 - 0s - 

Epoch 137/500
670/670 - 0s - loss: 0.0190 - accuracy: 0.9940
Epoch 138/500
670/670 - 0s - loss: 0.0182 - accuracy: 0.9955
Epoch 139/500
670/670 - 0s - loss: 0.0177 - accuracy: 0.9925
Epoch 140/500
670/670 - 0s - loss: 0.0173 - accuracy: 0.9940
Epoch 141/500
670/670 - 0s - loss: 0.0167 - accuracy: 0.9955
Epoch 142/500
670/670 - 0s - loss: 0.0170 - accuracy: 0.9925
Epoch 143/500
670/670 - 0s - loss: 0.0163 - accuracy: 0.9940
Epoch 144/500
670/670 - 0s - loss: 0.0153 - accuracy: 0.9955
Epoch 145/500
670/670 - 0s - loss: 0.0156 - accuracy: 0.9955
Epoch 146/500
670/670 - 0s - loss: 0.0153 - accuracy: 0.9955
Epoch 147/500
670/670 - 0s - loss: 0.0150 - accuracy: 0.9955
Epoch 148/500
670/670 - 0s - loss: 0.0160 - accuracy: 0.9955
Epoch 149/500
670/670 - 0s - loss: 0.0171 - accuracy: 0.9955
Epoch 150/500
670/670 - 0s - loss: 0.0132 - accuracy: 0.9970
Epoch 151/500
670/670 - 0s - loss: 0.0129 - accuracy: 0.9985
Epoch 152/500
670/670 - 0s - loss: 0.0118 - accuracy: 0.9985
Epoch 153/500
670/670 - 

Epoch 272/500
670/670 - 0s - loss: 0.0016 - accuracy: 1.0000
Epoch 273/500
670/670 - 0s - loss: 0.0018 - accuracy: 1.0000
Epoch 274/500
670/670 - 0s - loss: 0.0020 - accuracy: 1.0000
Epoch 275/500
670/670 - 0s - loss: 0.0024 - accuracy: 0.9985
Epoch 276/500
670/670 - 0s - loss: 0.0015 - accuracy: 1.0000
Epoch 277/500
670/670 - 0s - loss: 0.0017 - accuracy: 1.0000
Epoch 278/500
670/670 - 0s - loss: 0.0015 - accuracy: 1.0000
Epoch 279/500
670/670 - 0s - loss: 0.0015 - accuracy: 1.0000
Epoch 280/500
670/670 - 0s - loss: 0.0014 - accuracy: 1.0000
Epoch 281/500
670/670 - 0s - loss: 0.0015 - accuracy: 1.0000
Epoch 282/500
670/670 - 0s - loss: 0.0016 - accuracy: 1.0000
Epoch 283/500
670/670 - 0s - loss: 0.0014 - accuracy: 1.0000
Epoch 284/500
670/670 - 0s - loss: 0.0015 - accuracy: 1.0000
Epoch 285/500
670/670 - 0s - loss: 0.0016 - accuracy: 1.0000
Epoch 286/500
670/670 - 0s - loss: 0.0014 - accuracy: 1.0000
Epoch 287/500
670/670 - 0s - loss: 0.0018 - accuracy: 1.0000
Epoch 288/500
670/670 - 

Epoch 402/500
670/670 - 0s - loss: 5.0121e-04 - accuracy: 1.0000
Epoch 403/500
670/670 - 0s - loss: 4.2580e-04 - accuracy: 1.0000
Epoch 404/500
670/670 - 0s - loss: 5.8835e-04 - accuracy: 1.0000
Epoch 405/500
670/670 - 0s - loss: 7.6828e-04 - accuracy: 1.0000
Epoch 406/500
670/670 - 0s - loss: 3.9263e-04 - accuracy: 1.0000
Epoch 407/500
670/670 - 0s - loss: 3.5377e-04 - accuracy: 1.0000
Epoch 408/500
670/670 - 0s - loss: 4.3124e-04 - accuracy: 1.0000
Epoch 409/500
670/670 - 0s - loss: 4.1372e-04 - accuracy: 1.0000
Epoch 410/500
670/670 - 0s - loss: 4.1136e-04 - accuracy: 1.0000
Epoch 411/500
670/670 - 0s - loss: 3.3278e-04 - accuracy: 1.0000
Epoch 412/500
670/670 - 0s - loss: 3.8615e-04 - accuracy: 1.0000
Epoch 413/500
670/670 - 0s - loss: 3.2957e-04 - accuracy: 1.0000
Epoch 414/500
670/670 - 0s - loss: 3.3012e-04 - accuracy: 1.0000
Epoch 415/500
670/670 - 0s - loss: 3.4926e-04 - accuracy: 1.0000
Epoch 416/500
670/670 - 0s - loss: 5.8297e-04 - accuracy: 1.0000
Epoch 417/500
670/670 - 0

<tensorflow.python.keras.callbacks.History at 0x1dd9cf096a0>

In [104]:
# Determine the validity of the model
model_loss, model_accuracy = NN_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


224/224 - 0s - loss: 0.0143 - accuracy: 0.9955
Loss: 0.014347615617858927, Accuracy: 0.9955357313156128


In [110]:
# Make predictions.  The below is copied from in-class exercise
# new_data = np.array([[0.2, 0.3, 0.4]])
# print(f"Predicted class: {NN_model.predict_classes(new_data)}")

# bee_predictions = NN_model.predict.classes(X_test_scaled[:5])
# bee_pred_labels = label_encoder.inverse_transform(bee_predictions)