## Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir -p /root/.ssh/
!ls -l /root/.ssh/
!cp /content/drive/MyDrive/deploy_keys/id_ed25519* /root/.ssh/
# Add github as trusted entity
!ssh-keyscan github.com >> /root/.ssh/known_hosts
!ssh -T git@github.com
!git clone git@github.com:MysticShadow427/homo-mex-2024.git

In [None]:
%cd /content/homo-mex-2024

In [None]:
!pwd

## LoRA Fine Tuning

In [None]:
!pip install sentence-transformers

In [None]:
!pip install datasets

In [None]:
!pip install peft accelerate

In [None]:
%matplotlib notebook
!python /content/homo-mex-2024/src/train_lora_llm.py

In [None]:
!mkdir /content/lora_results

In [None]:
import shutil

source_folder = '/content/homo-mex-2024/bert-base-spanish-wwm-uncased-modified-homo-mex'
destination_folder = '/content/lora_results/tokenizer'

shutil.copytree(source_folder, destination_folder)

In [None]:
source_folder = '/content/homo-mex-2024/bert-base-spanish-wwm-uncased-peft-homo-mex'
destination_folder = '/content/lora_results/model'
shutil.copytree(source_folder, destination_folder)

In [None]:
!zip -r /content/result_lora_fine_tune_spanish_bert_folder_10_epochs_all_linear_adapters.zip /content/lora_results

In [None]:
import shutil

source_file = '/content/result_lora_fine_tune_spanish_bert_folder_10_epochs_all_linear_adapters.zip'
destination_folder = '/content/drive/MyDrive'

shutil.copy(source_file, destination_folder)


## Full Fine Tuning

In [None]:
!pip install -U sentence-transformers

In [None]:
%matplotlib notebook
!python /content/homo-mex-2024/src/mex_main.py --epochs 5  --learning_rate 2e-5 --batch_size 16

In [None]:
!zip -r /content/result_full_fine_tune_spanish_bert_folder_random_oversample.zip /content/homo-mex-2024/artifacts
from google.colab import files

files.download("/content/result_full_fine_tune_spanish_bert_folder_random_oversample.zip")


In [None]:
import shutil

source_file = '/content/result_full_fine_tune_spanish_bert_folder_random_oversample.zip'
destination_folder = '/content/drive/MyDrive'

shutil.copy(source_file, destination_folder)


## XgBoost on Sentence Embeddings

In [None]:
!pip install optuna xgboost imblearn

In [None]:
!python /content/homo-mex-2024/src/mex_train_xgboost.py --embeddings 'spanish-bert' --augmentation 'smote'

## LSTM on Spanish-BERT Features

In [None]:
!pip install -U sentence-transformers

In [None]:
%matplotlib notebook
!python /content/homo-mex-2024/src/mex_train_lstm.py --epochs 20 --learning_rate 3e-4 --batch_size 32 --dropout 0.3 --num_layers 2 --hidden_size 192 --bidirectional 0

In [None]:
!zip -r /content/result_lstm_spanish_bert_random_oversample_folder.zip /content/homo-mex-2024/artifacts
from google.colab import files

files.download("/content/result_lstm_spanish_bert_random_oversample_folder.zip")

In [None]:
import shutil

source_file = '/content/result_lstm_spanish_bert_random_oversample_folder.zip'
destination_folder = '/content/drive/MyDrive'

shutil.copy(source_file, destination_folder)


## Train Ensemble Models on Raw Oversampled Text

In [None]:
!pip install sentence-transformers

In [None]:
!python /content/homo-mex-2024/src/mex_train_ensemble.py --epochs 10 --learning_rate 4e-3 --batch_size 32