In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import keras
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

# Read the data

* These training data are from the same ant dataset we used for support vector machines.
* The response/label/Y variable is species.
* The features are morphological measurements from the ants.

In [None]:
# load ants dataset
dataframe = pd.read_csv('all_measurments2020x09.csv')

# replace species names with zeros and ones
dataframe['species'] = dataframe['species'].replace('zeteki', 0)
dataframe['species'] = dataframe['species'].replace('fovouros', 1)

# get X and Y 
Y = dataframe['species']
X = dataframe.drop(['species','caste', 'country', 'comment', 'bc_num', 'coll_code'], axis=1)

# view data
dataframe.head()

# Prepare training, testing, and validation datasets.
* We will use *training data* to train the neural network.
* We will use *validation data* to evaluate our model while tuning hyperparameters.
* We will use *test data* to obtain an unbiased measure of error for our final model.

In [None]:
# Split the dataset into training, testing, and validation sets
# NOTE: DO NOT CHANGE THESE RANDOM_STATE VARIABLES
X_train, X_test_val, y_train, y_test_val= train_test_split(X, Y, test_size=0.3, random_state=1234)
X_test, X_val, y_test, y_val = train_test_split(X, Y, test_size=0.5, random_state=1234)

# Standardize the features (optional but recommended for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Build the model in Keras

Next, we need to define, fit, and evaluate our model. Below, I have set up a simple model. First, fit this model, and evaluate the model on the training and test datasets. 

In the word document, I ask several questions about the results of changing different aspects of the network. Using these questions as a guide, change the following things and evaluate the accuracy of the model.

**CHANGE**:

* the number of hidden layers in the network
* the number of neurons in hidden layers
* the activation function used in the hidden layers

In [None]:
# build model
model = keras.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=(X_train.shape[1],)), # add a hidden layer with 30 neurons and ReLU activation 
    keras.layers.Dense(1, activation='sigmoid') # add the output layer
])

print(model.summary())

# Compile the model with a specific learning rate
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_val_scaled, y_val))

# Predictions on training and validation data
y_train_probs = model.predict(X_train_scaled)
y_val_probs = model.predict(X_val_scaled)

# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
y_train_pred = (y_train_probs > 0.5).astype(int)
y_val_pred = (y_val_probs > 0.5).astype(int)

# Compute confusion matrices
cm_train = confusion_matrix(y_train, y_train_pred)
cm_val = confusion_matrix(y_val, y_val_pred)

# Plot confusion matrices side by side
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
sns.heatmap(cm_train, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Training Confusion Matrix')

plt.subplot(1, 2, 2)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Validation Confusion Matrix')

plt.tight_layout()
plt.show()

# Evaluate your final model on the test dataset

**ADD**: Code to evaluate the model you think is most appropriate.

**HINT**: Copy, paste, and modify the code for making predictions, coverting probabilities to binary predictions, computing confusion matrices, and plotting confusion matrices. You will need to change the datasets that are used.