#### Google Colab: Environment Setup

In [None]:
# clone the repository if directory `/content/spam_detection2` does not exist and we're not already in it
import os

workspace_dir = '/content/spam_detection2'
current_dir = os.getcwd()
if not os.path.exists(workspace_dir) and current_dir != workspace_dir:
    !git clone https://github.com/RationalEar/spam_detection2.git
    os.chdir(workspace_dir)
    !ls -al
    !pip install -q transformers==4.48.0 scikit-learn pandas numpy matplotlib mlflow beautifulsoup4 shap lime
    !pip install -q torch --index-url https://download.pytorch.org/whl/cu126
else:
    os.chdir(workspace_dir)
    !git pull

In [None]:
from google.colab import drive
import pandas as pd
from utils.preprocessor import load_glove_embeddings
from train import train_model

In [None]:
# Mount Google Drive for saving models
drive.mount('/content/drive')
ROOT_PATH = '/content/drive/MyDrive/Projects/spam_detection2/'
MODEL_SAVE_PATH = os.path.join(ROOT_PATH, 'models')
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

In [None]:
from train import set_seed

set_seed(42)

#### Load preprocessed data (assumes PKLs/CSVs are available in data/processed/)

In [None]:
train_df = pd.read_pickle(ROOT_PATH + 'data/processed/train.pkl')
test_df = pd.read_pickle(ROOT_PATH + 'data/processed/test.pkl')

In [None]:
# Build vocabulary from training data
from utils.functions import build_vocab

word2idx = build_vocab(train_df['text'])

In [None]:
# Load GloVe embeddings
GLOVE_PATH = os.path.join(ROOT_PATH, 'data/raw/glove.6B/glove.6B.300d.txt')
embedding_dim = 300
max_len = 200
pretrained_embeddings = load_glove_embeddings(GLOVE_PATH, word2idx, embedding_dim)

#### Train CNN Model

In [None]:
train_model('cnn', train_df, test_df, embedding_dim=embedding_dim, pretrained_embeddings=pretrained_embeddings,
                model_save_path=MODEL_SAVE_PATH, max_len=max_len, evaluate=True)

#### Train BiLSTM Model

In [None]:
train_model('bilstm', train_df, test_df, embedding_dim=embedding_dim, pretrained_embeddings=pretrained_embeddings,
                model_save_path=MODEL_SAVE_PATH, max_len=max_len, evaluate=True)

#### Train BERT Model

In [None]:
train_model('bert', train_df, test_df, embedding_dim=embedding_dim, pretrained_embeddings=pretrained_embeddings,
                model_save_path=MODEL_SAVE_PATH, max_len=max_len, evaluate=True)