In [None]:
!conda install pydot
!conda install pydotplus

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import plot_model
from tensorflow.keras import layers
from sklearn import preprocessing

## Data Importing and Cleaning

In [2]:
size = pd.read_csv("Data/Week4/penguins_size.csv")

# Drop Rows with NA values 
clean = size.dropna()

# Drops row with a '.' for the Sex Variable
clean = clean[clean['sex'] != '.']
# only 11 rows were dropped 

# Creating Dummy Variables for Island and Sex
island = pd.get_dummies(clean['island'])
sex = pd.get_dummies(clean['sex'])

# Merging with Original Data
penguins = pd.merge(clean, island, left_index=True, right_index=True)
penguins = pd.merge(penguins, sex, left_index=True, right_index=True)

# Dropping Columns that the Dummies were made from 
penguins = penguins.drop(['sex', 'island', 'FEMALE'], axis = 1)
penguins

Unnamed: 0,species,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,Biscoe,Dream,Torgersen,MALE
0,Adelie,39.1,18.7,181.0,3750.0,0,0,1,1
1,Adelie,39.5,17.4,186.0,3800.0,0,0,1,0
2,Adelie,40.3,18.0,195.0,3250.0,0,0,1,0
4,Adelie,36.7,19.3,193.0,3450.0,0,0,1,0
5,Adelie,39.3,20.6,190.0,3650.0,0,0,1,1
...,...,...,...,...,...,...,...,...,...
338,Gentoo,47.2,13.7,214.0,4925.0,1,0,0,0
340,Gentoo,46.8,14.3,215.0,4850.0,1,0,0,0
341,Gentoo,50.4,15.7,222.0,5750.0,1,0,0,1
342,Gentoo,45.2,14.8,212.0,5200.0,1,0,0,0


### Splitting Prediction Variable from dataset
- X = Dataset with all Independent Variables 
- y = The Dependent Variable of Penguin Species

In [3]:
penguins_x = penguins.iloc[:,1:]
x = penguins_x.values
min_max_scaler = preprocessing.MinMaxScaler()
scaled_penguins_x = pd.DataFrame(min_max_scaler.fit_transform(x), columns=penguins_x.columns)
scaled_penguins_x

Unnamed: 0,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,Biscoe,Dream,Torgersen,MALE
0,0.254545,0.666667,0.152542,0.291667,0.0,0.0,1.0,1.0
1,0.269091,0.511905,0.237288,0.305556,0.0,0.0,1.0,0.0
2,0.298182,0.583333,0.389831,0.152778,0.0,0.0,1.0,0.0
3,0.167273,0.738095,0.355932,0.208333,0.0,0.0,1.0,0.0
4,0.261818,0.892857,0.305085,0.263889,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...
328,0.549091,0.071429,0.711864,0.618056,1.0,0.0,0.0,0.0
329,0.534545,0.142857,0.728814,0.597222,1.0,0.0,0.0,0.0
330,0.665455,0.309524,0.847458,0.847222,1.0,0.0,0.0,1.0
331,0.476364,0.202381,0.677966,0.694444,1.0,0.0,0.0,0.0


In [4]:
penguins_y = penguins['species']
print(penguins_y)
penguins_y = penguins_y.astype('category').cat.codes.to_numpy()
penguins_y

0      Adelie
1      Adelie
2      Adelie
4      Adelie
5      Adelie
        ...  
338    Gentoo
340    Gentoo
341    Gentoo
342    Gentoo
343    Gentoo
Name: species, Length: 333, dtype: object


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [5]:
inputs = keras.Input(shape=(8,)) #number of variables 
x = layers.Dense(50, activation = 'relu')(inputs) # single hidden layer with 50 nodes
x = layers.Dense(40, activation = 'sigmoid')(x)
outputs = layers.Dense(3, activation='sigmoid')(x) #dense means fully connected. 3 different classes
model = keras.Model(inputs=inputs, outputs=outputs, name="penguin_model")

In [6]:
keras.utils.plot_model(model, show_shapes = True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [22]:
import pydot
import graphviz

In [14]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["accuracy"],
)

# Batch size - number of samples per gradient update
#epochs - an iteration over the entire x and y data provided 
history = model.fit(penguins_x, penguins_y, batch_size = 64, epochs=10, validation_split=0.1)

scores = model.evaluate(penguins_x, penguins_y, verbose=2)

Epoch 1/10


  return dispatch_target(*args, **kwargs)


1/5 [=====>........................] - ETA: 5s - loss: 1.4060 - accuracy: 0.3125

  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
11/11 - 0s - loss: 1.0558 - accuracy: 0.4384 - 70ms/epoch - 6ms/step


In [15]:
model_scaled = keras.Model(inputs=inputs, outputs=outputs, name="penguin_model_scaled")

model_scaled.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["accuracy"],
)

history_scaled = model_scaled.fit(scaled_penguins_x, penguins_y, batch_size = 64, epochs = 10, validation_split = 0.1)

scores = model_scaled.evaluate(scaled_penguins_x, penguins_y, verbose = 2)

Epoch 1/10


  return dispatch_target(*args, **kwargs)


1/5 [=====>........................] - ETA: 4s - loss: 0.9816 - accuracy: 0.6094

  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
11/11 - 0s - loss: 0.8173 - accuracy: 0.6637 - 62ms/epoch - 6ms/step
