# Modelos

Vamos a probar dos tipos de modelos, unos basados en árboles de decisiones y otros en deep learning.

Las variables explicativas serán TaxonID y dive_month, es decir, la especie y mes del año.

Como variables predictoras se probarán Transect.code, Island y Bioregion. Estas variables indican zonas de las islas galápagos, y van de mayor a menor granularidad en cuanto a las zonas que indican.

## Árboles de decisiones

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import pandas as pd
import numpy as np

Codificamos los datos y realizamos partición train/test

In [None]:
df = pd.read_csv("Macroinvertebrados_clean")

le = preprocessing.LabelEncoder()
le.fit(df['Island'].unique())
df['Island'] = df['Island'].apply(lambda x: le.transform([x])[0])

le = preprocessing.LabelEncoder()
le.fit(df['dive_month'].unique())
df['dive_month'] = df['dive_month'].apply(lambda x: le.transform([x])[0])

le = preprocessing.LabelEncoder()
le.fit(df['Bioregion'].unique())
print(le.classes_)
df['Bioregion'] = df['Bioregion'].apply(lambda x: le.transform([x])[0])

le = preprocessing.LabelEncoder()
le.fit(df['Transect.code'].unique())
df['Transect.code'] = df['Transect.code'].apply(lambda x: le.transform([x])[0])

le = preprocessing.LabelEncoder()
le.fit(df['TaxonID'].unique())
df['TaxonID'] = df['TaxonID'].apply(lambda x: le.transform([x])[0])

train, test = train_test_split(df, test_size=0.2, shuffle=True)

['Bahía Elizabeth' 'Lejano Norte' 'Norte' 'Oeste' 'Sureste']


### Transecto

In [None]:
arbol = DecisionTreeClassifier(min_impurity_decrease=0.0001)
arbol.fit(train[["TaxonID", "dive_month"]], train['Transect.code'])
pred = arbol.predict(test[["TaxonID", "dive_month"]])
print(classification_report(test['Transect.code'], pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       1.0
           3       0.00      0.00      0.00       1.0
           4       0.00      0.00      0.00       0.0
           5       0.00      0.00      0.00       0.0
           6       0.00      0.00      0.00       2.0
           7       0.00      0.00      0.00       0.0
           8       0.00      0.00      0.00       1.0
           9       0.00      0.00      0.00       0.0
          10       0.00      0.00      0.00       2.0
          11       0.00      0.00      0.00       1.0
          12       0.00      0.00      0.00       1.0
          13       0.00      0.00      0.00       1.0
          14       0.00      0.00      0.00       1.0
          15       0.00      0.00      0.00       1.0
          17       0.00      0.00      0.00       1.0
          18       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Isla

In [None]:
arbol = DecisionTreeClassifier(min_impurity_decrease=0.0001)
arbol.fit(train[["TaxonID", "dive_month"]], train['Island'])
pred = arbol.predict(test[["TaxonID", "dive_month"]])
print(classification_report(test['Island'], pred))

              precision    recall  f1-score   support

           0       0.15      0.04      0.07        45
           1       0.24      0.06      0.09       107
           2       0.28      0.06      0.10       183
           3       0.25      0.28      0.27       152
           4       0.31      0.19      0.23        97
           5       0.50      0.81      0.62       479
           6       0.00      0.00      0.00        54
           7       0.00      0.00      0.00        10
           8       0.00      0.00      0.00         9
           9       0.33      0.06      0.10       113
          10       0.24      0.57      0.34        96

    accuracy                           0.39      1345
   macro avg       0.21      0.19      0.17      1345
weighted avg       0.34      0.39      0.32      1345



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Bioregion

In [None]:
arbol = DecisionTreeClassifier(min_impurity_decrease=0.0001)
arbol.fit(train[["TaxonID", "dive_month"]], train['Bioregion'])
pred = arbol.predict(test[["TaxonID", "dive_month"]])
print(classification_report(test['Bioregion'], pred))

              precision    recall  f1-score   support

           0       0.41      0.28      0.33       329
           1       0.45      0.62      0.52       141
           2       0.34      0.27      0.30       161
           3       0.45      0.37      0.41       329
           4       0.45      0.62      0.52       385

    accuracy                           0.43      1345
   macro avg       0.42      0.43      0.42      1345
weighted avg       0.43      0.43      0.42      1345



## Redes neuronales

In [None]:
!pip install pytorch_lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-2.0.2-py3-none-any.whl (719 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.0/719.0 kB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.7.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.8.0-py3-none-any.whl (20 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]>2021.06.0->pytorch_lightning)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
Collecting multidict

In [None]:
import torch
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
import numpy as np
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from sklearn import preprocessing

Añadimos una partición de validación

In [None]:
train, val = train_test_split(train, test_size=0.25, shuffle=True)

Definimos dataset

In [None]:
class MyDataset(Dataset):
  def __init__(self, data, columns_explic, column_pred):
    self.data = data
    self.columns_explic = columns_explic
    self.column_pred = column_pred
  
  def __getitem__(self, index):
    return torch.from_numpy(self.data[self.columns_explic].iloc[index].to_numpy()).float(), torch.tensor(self.data[self.column_pred].iloc[index])
  
  def __len__(self):
    return self.data.shape[0]

Definimos red neuronal

In [None]:
class PredictorNN(pl.LightningModule):
  def __init__(self, num_taxon, num_month, num_clases):
    super(PredictorNN, self).__init__()
    self.embedding_taxon = nn.Embedding(num_taxon, 32)
    self.embedding_month = nn.Embedding(num_month, 8)
    self.dense1 = nn.Linear(32+8, 16)
    self.output_layer = nn.Linear(16, num_clases)
    self.preds_test = []
    self.clases_test = []

  def forward(self, data):
    embed_taxon = self.embedding_taxon(data[:, 0].int())
    embed_month = self.embedding_month(data[:, 1].int())
    data = torch.cat((embed_taxon, embed_month), dim=1)
    out = F.relu(self.dense1(data))
    logits = self.output_layer(out)
    return logits

  def training_step(self, batch, batch_index):
    explic, clases = batch
    preds = self(explic)
    loss = F.cross_entropy(preds, clases)
    self.log("loss", loss, prog_bar=True)
    return {"loss": loss}

  def validation_step(self, batch, batch_index):
    explic, clases = batch
    preds = self(explic)
    val_loss = F.cross_entropy(preds, clases)
    self.log("val_loss", val_loss)
    return None

  def test_step(self, batch, batch_index):
    explic, clases = batch
    preds = self(explic)
    preds = F.softmax(preds, dim=1)
    preds = torch.argmax(preds, dim=1)
    self.preds_test = self.preds_test+ preds.detach().cpu().numpy().tolist()
    self.clases_test = self.clases_test + clases.detach().cpu().numpy().tolist()
    return None

  def on_test_epoch_end(self):
    print(classification_report(self.clases_test, self.preds_test))

  def configure_optimizers(self):
     return torch.optim.Adam(self.parameters(), 0.0003)



### Transecto

In [None]:
# Creamos datasets y dataloaders
dataset_train = MyDataset(train, ["TaxonID", "dive_month"], "Transect.code")
loader_train = DataLoader(dataset_train, 64, True)
dataset_test = MyDataset(test, ["TaxonID", "dive_month"], "Transect.code")
loader_test = DataLoader(dataset_test, 64, True)
dataset_val = MyDataset(val, ["TaxonID", "dive_month"], "Transect.code")
loader_val = DataLoader(dataset_val, 64, True)

# Creamos modelo
model_check = ModelCheckpoint("model", monitor="val_loss")
early_stopping = EarlyStopping('val_loss', 0.001, 10)
model = PredictorNN(num_taxon=len(df["TaxonID"].unique()), num_month=len(df["dive_month"].unique()), num_clases=len(df["Transect.code"].unique()))

# Entrenamos modelo y lo probamos 
trainer = pl.Trainer(max_epochs=200, devices=1, accelerator="gpu", callbacks=[early_stopping, model_check])
trainer.fit(model, loader_train, loader_val)
trainer.test(ckpt_path=model_check.best_model_path, dataloaders=loader_test)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name            | Type      | Params
----------------------------------------------
0 | embedding_taxon | Embedding | 2.5 K 
1 | embedding_month | Embedding | 64    
2 | dense1          | Linear    | 656   
3 | output_layer    | Linear    | 18.0 K
----------------------------------------------
21.2 K    Trainable params
0         Non-trainable params
21.2 K    Total params
0.085     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/model/epoch=40-step=2624.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/model/epoch=40-step=2624.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00         1
           6       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         2
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          17       0.00      0.00      0.00         1
          18       0.00      0.00      0.00         1
          19       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         2
          21       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[{}]

### Isla

In [None]:
# Creamos datasets y dataloaders
dataset_train = MyDataset(train, ["TaxonID", "dive_month"], "Island")
loader_train = DataLoader(dataset_train, 64, True)
dataset_test = MyDataset(test, ["TaxonID", "dive_month"], "Island")
loader_test = DataLoader(dataset_test, 64, True)
dataset_val = MyDataset(val, ["TaxonID", "dive_month"], "Island")
loader_val = DataLoader(dataset_val, 64, True)

# Creamos modelo
model_check = ModelCheckpoint("model", monitor="val_loss")
early_stopping = EarlyStopping('val_loss', 0.001, 10)
model = PredictorNN(num_taxon=len(df["TaxonID"].unique()), num_month=len(df["dive_month"].unique()), num_clases=len(df["Island"].unique()))

# Entrenamos modelo y lo probamos 
trainer = pl.Trainer(max_epochs=200, devices=1, accelerator="gpu", callbacks=[early_stopping, model_check])
trainer.fit(model, loader_train, loader_val)
trainer.test(ckpt_path=model_check.best_model_path, dataloaders=loader_test)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name            | Type      | Params
----------------------------------------------
0 | embedding_taxon | Embedding | 2.5 K 
1 | embedding_month | Embedding | 64    
2 | dense1          | Linear    | 656   
3 | output_layer    | Linear    | 187   
----------------------------------------------
3.4 K     Trainable params
0         Non-trainable params
3.4 K     Total params
0.014     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/model/epoch=101-step=6528.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/model/epoch=101-step=6528.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.33      0.11      0.17        45
           1       0.23      0.12      0.16       107
           2       0.25      0.03      0.06       183
           3       0.27      0.26      0.26       152
           4       0.33      0.31      0.32        97
           5       0.48      0.85      0.61       479
           6       0.12      0.04      0.06        54
           7       0.00      0.00      0.00        10
           8       0.00      0.00      0.00         9
           9       0.32      0.11      0.16       113
          10       0.31      0.40      0.35        96

    accuracy                           0.41      1345
   macro avg       0.24      0.20      0.20      1345
weighted avg       0.34      0.41      0.34      1345



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[{}]

### Bioregion

In [None]:
# Creamos datasets y dataloaders
dataset_train = MyDataset(train, ["TaxonID", "dive_month"], "Bioregion")
loader_train = DataLoader(dataset_train, 64, True)
dataset_test = MyDataset(test, ["TaxonID", "dive_month"], "Bioregion")
loader_test = DataLoader(dataset_test, 64, True)
dataset_val = MyDataset(val, ["TaxonID", "dive_month"], "Bioregion")
loader_val = DataLoader(dataset_val, 64, True)

# Creamos modelo
model_check = ModelCheckpoint("model", monitor="val_loss")
early_stopping = EarlyStopping('val_loss', 0.001, 10)
model = PredictorNN(num_taxon=len(df["TaxonID"].unique()), num_month=len(df["dive_month"].unique()), num_clases=len(df["Bioregion"].unique()))

# Entrenamos modelo y lo probamos 
trainer = pl.Trainer(max_epochs=200, devices=1, accelerator="gpu", callbacks=[early_stopping, model_check])
trainer.fit(model, loader_train, loader_val)
trainer.test(ckpt_path=model_check.best_model_path, dataloaders=loader_test)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name            | Type      | Params
----------------------------------------------
0 | embedding_taxon | Embedding | 2.5 K 
1 | embedding_month | Embedding | 64    
2 | dense1          | Linear    | 656   
3 | output_layer    | Linear    | 85    
----------------------------------------------
3.3 K     Trainable params
0         Non-trainable params
3.3 K     Total params
0.013     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/model/epoch=63-step=4096.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/model/epoch=63-step=4096.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.40      0.23      0.29       329
           1       0.46      0.57      0.51       141
           2       0.36      0.17      0.24       161
           3       0.46      0.40      0.43       329
           4       0.43      0.69      0.53       385

    accuracy                           0.43      1345
   macro avg       0.42      0.41      0.40      1345
weighted avg       0.43      0.43      0.41      1345



[{}]

# Calculo Max accuracy

In [None]:
dicc = {}

for taxon, month, isla in zip(df['TaxonID'], df['dive_month'], df['Island']):
  if frozenset([taxon, month]) in dicc:
    if frozenset([isla]) in dicc[frozenset([taxon, month])]:
      dicc[frozenset([taxon, month])][frozenset([isla])] +=1
    else:
      dicc[frozenset([taxon, month])][frozenset([isla])] =1
  else:
      dicc[frozenset([taxon, month])] = {frozenset([isla]): 1}

count = 0
for key, value in dicc.items():
  max = 0
  for key_1, value_2 in value.items():
    if max < value[key_1]:
      max = value[key_1]
  count += max

print("Max accuracy", count/len(df['TaxonID']))