# Deep Learning Project

In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

## Importing Libraries

In [2]:
# Basic Libraries
import pandas as pd
import numpy as np
import os
import time

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# Modeling
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

import cv2 as cv

# Keras Hypertuner - Hyperband
#from tensorflow.keras.optimizers import Adam 
#from keras_tuner.tuners import Hyperband

# QoL
import warnings
warnings.filterwarnings("ignore")

%load_ext autoreload
%autoreload 2

Verifying if tensorflow is using the GPU

In [3]:
tf.config.list_physical_devices('GPU') # Not working

[]

Computing the time that has passed since a given start time

In [4]:
#Start Time
start_time = time.time()

## Importing Data

Creating a path

In [5]:
path = "."
#path = "/content/drive/MyDrive/...." Google Drive

In [6]:
metadata = pd.read_csv(path + '/rare_species 1/metadata.csv')

In [7]:
metadata.head(10)

Unnamed: 0,rare_species_id,eol_content_id,eol_page_id,kingdom,phylum,family,file_path
0,75fd91cb-2881-41cd-88e6-de451e8b60e2,12853737,449393,animalia,mollusca,unionidae,mollusca_unionidae/12853737_449393_eol-full-si...
1,28c508bc-63ff-4e60-9c8f-1934367e1528,20969394,793083,animalia,chordata,geoemydidae,chordata_geoemydidae/20969394_793083_eol-full-...
2,00372441-588c-4af8-9665-29bee20822c0,28895411,319982,animalia,chordata,cryptobranchidae,chordata_cryptobranchidae/28895411_319982_eol-...
3,29cc6040-6af2-49ee-86ec-ab7d89793828,29658536,45510188,animalia,chordata,turdidae,chordata_turdidae/29658536_45510188_eol-full-s...
4,94004bff-3a33-4758-8125-bf72e6e57eab,21252576,7250886,animalia,chordata,indriidae,chordata_indriidae/21252576_7250886_eol-full-s...
5,dc48f2ce-4feb-4ef7-b2a2-c3c3f42bf19b,28657539,491832,animalia,arthropoda,formicidae,arthropoda_formicidae/28657539_491832_eol-full...
6,3d881320-8ba8-4580-a72c-0e7ab116b664,29548208,47043290,animalia,chordata,fringillidae,chordata_fringillidae/29548208_47043290_eol-fu...
7,7faca96a-54e6-4c80-b9e4-77ab126d904a,21232818,1033999,animalia,arthropoda,gomphidae,arthropoda_gomphidae/21232818_1033999_eol-full...
8,9f89ecab-aabd-41a4-b5b4-8ce106d85959,20315204,46561012,animalia,chordata,myliobatidae,chordata_myliobatidae/20315204_46561012_eol-fu...
9,b6ec7a70-c470-4ede-8930-05844e1efd2e,20124498,46570095,animalia,chordata,pleuronectidae,chordata_pleuronectidae/20124498_46570095_eol-...


In [None]:
# Analise dos dados (metadata)
# Weird Images

In [8]:
# split data into 90 - 10

In [9]:
label_encoder = LabelEncoder()
metadata['label'] = label_encoder.fit_transform(metadata['family']) 
metadata['file_path'] = "rare_species 1/" + metadata['file_path']

In [10]:
metadata.head(3)

Unnamed: 0,rare_species_id,eol_content_id,eol_page_id,kingdom,phylum,family,file_path,label
0,75fd91cb-2881-41cd-88e6-de451e8b60e2,12853737,449393,animalia,mollusca,unionidae,rare_species 1/mollusca_unionidae/12853737_449...,193
1,28c508bc-63ff-4e60-9c8f-1934367e1528,20969394,793083,animalia,chordata,geoemydidae,rare_species 1/chordata_geoemydidae/20969394_7...,84
2,00372441-588c-4af8-9665-29bee20822c0,28895411,319982,animalia,chordata,cryptobranchidae,rare_species 1/chordata_cryptobranchidae/28895...,56


In [11]:
def load_and_preprocess_image(path, target_size=(100, 100)):
    img = cv.imread(path)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    img = cv.resize(img, target_size)
    img = img.astype('float32') / 255.0
    return img
X = np.array([load_and_preprocess_image(path) for path in metadata['file_path']])

In [19]:
y = to_categorical(metadata['label'])

In [20]:
X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [21]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(100, 100, 3)),
    MaxPooling2D(),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(metadata['label'].unique()), activation='softmax') 
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 57ms/step - accuracy: 0.0372 - loss: 5.1329 - val_accuracy: 0.0955 - val_loss: 4.7136
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 58ms/step - accuracy: 0.1089 - loss: 4.5299 - val_accuracy: 0.1260 - val_loss: 4.3983
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step - accuracy: 0.1989 - loss: 3.8344 - val_accuracy: 0.1460 - val_loss: 4.3610
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 56ms/step - accuracy: 0.3545 - loss: 2.9007 - val_accuracy: 0.1673 - val_loss: 4.5946
Epoch 5/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 56ms/step - accuracy: 0.5485 - loss: 1.9338 - val_accuracy: 0.1619 - val_loss: 5.4149
Epoch 6/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step - accuracy: 0.7276 - loss: 1.1369 - val_accuracy: 0.1589 - val_loss: 6.8563
Epoch 7/10
[1m3

<keras.src.callbacks.history.History at 0x18768ddec90>

In [22]:
# Make predictions
y_pred = model.predict(X_val)  # (for validation data)
y_pred_classes = np.argmax(y_pred, axis=1)  # get predicted class labels

# True class labels
y_true = np.argmax(y_val, axis=1)  # convert one-hot encoded labels to class indices

# Calculate F1 score
f1 = f1_score(y_true, y_pred_classes, average='weighted')  # weighted for class imbalance

print(f"F1 Score: {f1}")

[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
F1 Score: 0.14814192448488203
