In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import os


In [2]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv('species_data.csv')
df = df.sample(frac=0.4, random_state=42)

# Drop unnecessary columns
df = df.drop(['internalTaxonId', 'scientificName'], axis=1)

# Encode categorical features
label_encoders = {}
for col in ['speciesName', 'systems', 'scopes', 'Category']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Separate features and target
X = df.drop('Category', axis=1)
y = df['Category']


In [3]:
print(df['Category'].unique())

[1 0 2]


In [4]:
from sklearn.model_selection import train_test_split

# Split data: 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# Fit on training data and transform
X_train_scaled = scaler.fit_transform(X_train)

# Only transform the test set
X_test_scaled = scaler.transform(X_test)


In [6]:
# Define simple model
model1 = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')  # Output layer for multi-class classification
])

# Compile the model
model1.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history1 = model1.fit(X_train_scaled, y_train, epochs=20, batch_size=32)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.7109 - loss: 0.8094
Epoch 2/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.7108 - loss: 0.7883
Epoch 3/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.7141 - loss: 0.7805
Epoch 4/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7095 - loss: 0.7857
Epoch 5/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7091 - loss: 0.7868
Epoch 6/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7103 - loss: 0.7844
Epoch 7/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7094 - loss: 0.7860
Epoch 8/20
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7153 - loss: 0.7768
Epoch 9/20
[1m1206/1206

In [7]:
# Save the model
os.makedirs("saved_models", exist_ok=True)
model1.save("saved_models/model1_simple.h5")




In [8]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, regularizers, callbacks, optimizers

# Normalize the input data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model with optimization techniques
model2 = Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),  # Increased neurons, adjusted regularization
    layers.Dropout(0.3),  # Dropout layer to reduce overfitting
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),  # Dropout layer to reduce overfitting
    layers.Dense(64, activation='relu'),
    layers.Dense(3, activation='softmax')  # Output layer for 3 classes
])

# Compile the model with an optimizer
model2.compile(optimizer=optimizers.Adam(learning_rate=0.0005),  # Reduced learning rate
               loss='sparse_categorical_crossentropy',  # Use categorical_crossentropy if using one-hot encoding
               metrics=['accuracy'])

# Early stopping callback and Learning Rate Scheduler
early_stopping = callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
lr_scheduler = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.00001)

# Train the model
history2 = model2.fit(X_train_scaled, y_train,
                      epochs=100,  # Increased epochs to allow for better convergence
                      callbacks=[early_stopping, lr_scheduler])

# Evaluate the model on the test set
test_loss, test_accuracy = model2.evaluate(X_test_scaled, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Epoch 1/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.6964 - loss: 0.8344 - learning_rate: 5.0000e-04
Epoch 2/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7108 - loss: 0.7960 - learning_rate: 5.0000e-04
Epoch 3/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7053 - loss: 0.7963 - learning_rate: 5.0000e-04
Epoch 4/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7101 - loss: 0.7866 - learning_rate: 5.0000e-04
Epoch 5/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7126 - loss: 0.7834 - learning_rate: 5.0000e-04
Epoch 6/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.7077 - loss: 0.7891 - learning_rate: 5.0000e-04
Epoch 7/100
[1m1206/1206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - acc

In [9]:
# Evaluate the model
loss2, acc2 = model2.evaluate(X_test_scaled, y_test)
print(f"Model 2 Test Accuracy: {acc2:.2f}")


[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7058 - loss: 0.7844
Model 2 Test Accuracy: 0.70


In [10]:
# Save the optimized model
model2.save("saved_models/model2_optimized.h5")


