# Train DistilBERT (multilingual) and convert to TFLite

This notebook trains a small multilingual model for phishing detection using `distilbert-base-multilingual-cased`, saves the best checkpoint, converts it to a TensorFlow SavedModel and then to a TFLite file. It includes small tests and saves the artifacts to Google Drive for easy download.

## 0) Notes before you start

- Use GPU runtime (Runtime → Change runtime type → GPU).
- Upload `train.csv`, `validation.csv`, and `test.csv` via the file upload UI or mount your Drive and place them in a folder.
- This notebook is intentionally minimal and uses small defaults to keep runtime short.

In [None]:
# 1) Install dependencies (run once)
!pip install -q transformers datasets accelerate evaluate sentencepiece
!pip install -q 'tensorflow>=2.12'  # for TFLite conversion and interpreter

print('Installed packages')

In [None]:
# 2) Mount Google Drive (optional) or upload files manually
from google.colab import drive, files
import os

drive_mount_path = '/content/drive'
print('If you want to use Drive, run: drive.mount(drive_mount_path) and place CSVs under a folder; otherwise use files.upload()')
# Uncomment to mount
# drive.mount(drive_mount_path)

# Helper: if local files not present, prompt manual upload
def ensure_file(path):
    if not os.path.exists(path):
        print(f"Upload {os.path.basename(path)}")
        uploaded = files.upload()
        for name in uploaded.keys():
            print('Uploaded', name)

# Set expected filenames (change if your filenames differ)
TRAIN_CSV = 'train.csv'
VAL_CSV = 'validation.csv'
TEST_CSV = 'test.csv'

for p in (TRAIN_CSV, VAL_CSV, TEST_CSV):
    ensure_file(p)

print('Ready to load CSVs from this runtime workspace')

In [None]:
# 3) Load and quick-validate the CSVs
import pandas as pd

train_df = pd.read_csv(TRAIN_CSV)
val_df = pd.read_csv(VAL_CSV)
test_df = pd.read_csv(TEST_CSV)

print('Train', len(train_df), 'Val', len(val_df), 'Test', len(test_df))
print('Sample train rows:')
display(train_df.head())

# Ensure label column exists and map to integers (0 = legitimate, 1 = phishing)
label_map = { 'phishing': 1, 'legitimate': 0 }
if train_df['label'].dtype != 'int64':
    train_df['label'] = train_df['label'].map(label_map).astype('int64')
    val_df['label'] = val_df['label'].map(label_map).astype('int64')
    test_df['label'] = test_df['label'].map(label_map).astype('int64')

print('Label distribution (train):')
print(train_df['label'].value_counts())

## 4) Prepare Hugging Face datasets and tokenizer
We tokenize with `distilbert-base-multilingual-cased`. We use short sequences (max_length=128) for mobile efficiency.

In [None]:
from datasets import Dataset
from transformers import AutoTokenizer

model_name = 'distilbert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_ds = Dataset.from_pandas(train_df[['message','label']].rename(columns={'message':'text'}))
val_ds = Dataset.from_pandas(val_df[['message','label']].rename(columns={'message':'text'}))
test_ds = Dataset.from_pandas(test_df[['message','label']].rename(columns={'message':'text'}))

def tokenize_fn(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=128)

train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds = val_ds.map(tokenize_fn, batched=True)
test_ds = test_ds.map(tokenize_fn, batched=True)

train_ds = train_ds.remove_columns(['text']).with_format('torch')
val_ds = val_ds.remove_columns(['text']).with_format('torch')
test_ds = test_ds.remove_columns(['text']).with_format('torch')

print('Datasets tokenized')

## 5) Initialize model and Trainer
We use the Hugging Face `Trainer` for simplicity.

In [None]:
import numpy as np
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy',
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)

print('Trainer ready')

In [None]:
# 6) Train (this may take ~30-60 minutes on Colab GPU depending on dataset size)
train_result = trainer.train()
print('Training finished')
trainer.save_model('./phishing_detector_model')
tokenizer.save_pretrained('./phishing_detector_model')
print('Model and tokenizer saved to ./phishing_detector_model')

## 7) Convert PyTorch model to TensorFlow and then to TFLite
We convert the saved PyTorch checkpoint to a TF SavedModel using `TFAutoModelForSequenceClassification.from_pretrained(..., from_pt=True)`, then use the TensorFlow Lite converter.

In [None]:
# 7.1 Convert to TensorFlow SavedModel
from transformers import TFAutoModelForSequenceClassification
import tensorflow as tf
import os

tf_model_dir = './tf_saved_model'
if os.path.exists(tf_model_dir):
    print('Removing previous TF model')
    import shutil
    shutil.rmtree(tf_model_dir)

print('Loading PyTorch checkpoint and converting to TF...')
tf_model = TFAutoModelForSequenceClassification.from_pretrained('./phishing_detector_model', from_pt=True)
tf_model.save_pretrained(tf_model_dir)
print('Saved TF model to', tf_model_dir)

In [None]:
# 7.2 Convert SavedModel to TFLite
converter = tf.lite.TFLiteConverter.from_saved_model(tf_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Use float16 quant if you want a smaller model and your device supports it
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()
open('phishing_detector.tflite', 'wb').write(tflite_model)
print('Wrote phishing_detector.tflite (size MB):', round(len(tflite_model)/(1024*1024),2))

### 7.3 Prepare representative calibration set (for full-int8 quantization)
Use a sample of real messages from the training CSV as the representative dataset. This cell builds `calibration_texts` by sampling up to 500 messages from `TRAIN_CSV` used earlier in the notebook.

In [None]:
# Build a representative calibration list from the training CSV (up to 500 samples)
import pandas as pd
# TRAIN_CSV was defined earlier in the notebook as 'train.csv' — if you mounted Drive or uploaded files, that variable will be present
try:
    df = pd.read_csv(TRAIN_CSV)
except Exception:
    # fallback: try the processed path if present in runtime workspace
    df = pd.read_csv('data/processed/train.csv')
# Ensure we have a message column and drop NA
df = df.dropna(subset=['message'])
n = min(500, len(df))
# stratified-ish sample: sample equally across labels if possible
if 'label' in df.columns:
    # convert label to string in case it's numeric
    df['label'] = df['label'].astype(str)
    # group and sample from each label proportionally
    groups = []
    for _, g in df.groupby('label'):
        groups.append(g.sample(frac=min(1, n/len(df)), random_state=42))
    sample_df = pd.concat(groups).sample(n=n, random_state=42) if len(df) > n else df.sample(n=n, random_state=42)
else:
    sample_df = df.sample(n=n, random_state=42)
calibration_texts = sample_df['message'].astype(str).tolist()
print('Prepared calibration_texts from', len(calibration_texts), 'messages')
# show first few examples
for t in calibration_texts[:8]:
    print('-', t)

### 7.4 Full integer (int8) TFLite conversion using the representative dataset
This cell runs full integer quantization with the calibration texts built above and writes `phishing_detector_int8.tflite`.

In [None]:
# 7.4 Full integer (int8) TFLite conversion — robust representative generator
import numpy as np
import traceback
import tensorflow as tf
from transformers import AutoTokenizer

# Load tokenizer saved earlier during training
tokenizer = AutoTokenizer.from_pretrained('./phishing_detector_model')
MAX_LEN = 128

print('TensorFlow version:', tf.__version__)
print('SavedModel path:', tf_model_dir)

# Inspect saved model signature (helps confirm input names expected by converter)
try:
    loaded = tf.saved_model.load(tf_model_dir)
    sigs = list(loaded.signatures.keys())
    print('SavedModel signatures:', sigs)
    if 'serving_default' in sigs:
        sd = loaded.signatures['serving_default']
        try:
            print('serving_default structured_input_signature:', sd.structured_input_signature)
        except Exception:
            pass
except Exception as e:
    print('Warning: could not inspect SavedModel signatures:', e)

# Representative dataset generator that includes both the bare input names and the serving_default names.
# This guarantees the calibrator will find a matching key regardless of SavedModel naming.

def representative_with_both():
    for t in calibration_texts:
        enc = tokenizer(t, truncation=True, padding='max_length', max_length=MAX_LEN, return_tensors='np')
        input_ids = enc['input_ids'].astype(np.int32)
        attention_mask = enc['attention_mask'].astype(np.int32)
        # include both naming variants
        yield {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'serving_default_input_ids:0': input_ids,
            'serving_default_attention_mask:0': attention_mask,
        }

# Run conversion to full integer (int8) using the representative generator
try:
    converter = tf.lite.TFLiteConverter.from_saved_model(tf_model_dir)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_with_both
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    print('Running int8 conversion (this may take a while)...')
    tflite_int8 = converter.convert()
    open('phishing_detector_int8.tflite', 'wb').write(tflite_int8)
    print('Wrote phishing_detector_int8.tflite (MB):', round(len(tflite_int8)/(1024*1024),2))
except Exception as e:
    print('Int8 conversion failed:')
    traceback.print_exc()
    # As a fallback, write errors to a file for inspection
    with open('int8_conversion_error.txt', 'w') as fh:
        import traceback as _tb
        fh.write(_tb.format_exc())
    print('Wrote int8_conversion_error.txt with traceback')


### 7.5 Quick smoke-test for the int8 TFLite model
This robust test resizes inputs to (1,128) like your working smoke-test and prints probabilities.

In [None]:
# 7.5 Robust smoke-tests: dynamic-range, float16, and int8 models
import os
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer
import pprint

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('./phishing_detector_model')

# Helper: robust inference using resize/allocate approach

def prepare_arrays_for_interpreter(enc, input_details):
    arrays = []
    for inp in input_details:
        name = inp['name'].lower()
        if 'input_ids' in name or ('input' in name and 'id' in name):
            arr = enc.get('input_ids')
        elif 'attention' in name and 'mask' in name:
            arr = enc.get('attention_mask')
        elif 'token_type' in name or 'segment' in name:
            arr = enc.get('token_type_ids', None)
            if arr is None:
                arr = np.zeros_like(enc['input_ids'])
        else:
            arr = enc.get('input_ids')
        if arr is None:
            raise RuntimeError(f"Could not find a suitable tensor for interpreter input '{name}'")
        # normalize dtype
        expected_dtype = inp['dtype']
        if arr.dtype != expected_dtype:
            try:
                arr = arr.astype(expected_dtype)
            except Exception:
                arr = arr.astype(np.int32)
        arrays.append(arr)
    return arrays


def safe_resize_and_allocate(interpreter, input_details, arrays):
    resized = False
    for inp, arr in zip(input_details, arrays):
        current_shape = list(inp['shape'])
        desired_shape = list(arr.shape)
        if current_shape != desired_shape:
            interpreter.resize_tensor_input(inp['index'], desired_shape, strict=False)
            resized = True
            print(f"Resized input '{inp['name']}' from {current_shape} -> {desired_shape}")
    if resized:
        interpreter.allocate_tensors()
        new_input_details = interpreter.get_input_details()
        new_output_details = interpreter.get_output_details()
        print('Re-queried input details (after resize & allocate):')
        pprint.pprint(new_input_details)
        return new_input_details, new_output_details
    else:
        try:
            interpreter.allocate_tensors()
        except Exception:
            pass
        return input_details, interpreter.get_output_details()


def run_smoke(tflite_path, samples):
    print('\nRunning smoke-test for:', tflite_path)
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    print('input_details:', input_details)
    print('output_details:', output_details)

    for text in samples:
        enc = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='np')
        enc = {k: np.asarray(v) for k, v in enc.items()}
        arrays = prepare_arrays_for_interpreter(enc, input_details)
        new_input_details, new_output_details = safe_resize_and_allocate(interpreter, input_details, arrays)
        for inp, arr in zip(new_input_details, arrays):
            idx = inp['index']
            expected_shape = tuple(inp['shape'])
            if tuple(arr.shape) != expected_shape:
                try:
                    arr = arr.reshape(expected_shape)
                except Exception:
                    raise ValueError(f"Final shape mismatch for input {inp['name']}: tensor shape {arr.shape} vs expected {expected_shape}")
            interpreter.set_tensor(idx, arr)
        interpreter.invoke()
        out = interpreter.get_tensor(new_output_details[0]['index'])
        if new_output_details[0]['dtype'] == np.int8:
            scale, zero_point = new_output_details[0]['quantization']
            out = (out.astype(np.float32) - zero_point) * scale
        import scipy.special
        if out.ndim == 2 and out.shape[1] >= 2:
            probs = scipy.special.softmax(out, axis=-1)[0].tolist()
            pred = int(np.argmax(out, axis=-1)[0])
        else:
            if out.ndim == 2 and out.shape[1] == 1:
                score = 1.0 / (1.0 + np.exp(-out[0][0]))
                probs = [1 - float(score), float(score)]
                pred = int(score > 0.5)
            else:
                probs = out.flatten().tolist()
                pred = int(np.argmax(out, axis=-1)[0]) if out.size > 1 else int(out.flatten()[0] > 0.5)
        print('TEXT:', text)
        print('pred:', pred, 'probs:', probs)


# Prepare sample messages
samples = [
    "URGENT: Your account will be suspended. Click http://fake.example to verify",
    "Hey, let's meet tomorrow for lunch"
]

# Test dynamic-range (if exists)
if os.path.exists('phishing_detector_dynamic.tflite'):
    run_smoke('phishing_detector_dynamic.tflite', samples)
else:
    print('phishing_detector_dynamic.tflite not found — dynamic-range test skipped')

# Test float16
if os.path.exists('phishing_detector.tflite'):
    run_smoke('phishing_detector.tflite', samples)
else:
    print('phishing_detector.tflite not found — float16 test skipped')

# Test int8
if os.path.exists('phishing_detector_int8.tflite'):
    run_smoke('phishing_detector_int8.tflite', samples)
else:
    print('phishing_detector_int8.tflite not found — int8 test skipped')


## 8) Quick TFLite smoke test
We run a small test by tokenizing a sample message and running the TFLite interpreter. This is simplified: a production mobile client should implement the same tokenization flow used during training.

In [None]:
import numpy as np
import tensorflow as tf

# Load tokenizer and tflite model
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('./phishing_detector_model')
interpreter = tf.lite.Interpreter(model_path='phishing_detector.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def predict_tflite(text):
    enc = tokenizer(text, padding='max_length', truncation=True, max_length=128, return_tensors='np')
    # Hugging Face models expect input IDs and attention mask; input names can vary
    # Map inputs by name where possible.
    for name, arr in enc.items():
        if name == 'input_ids':
            interpreter.set_tensor(input_details[0]['index'], arr.astype(np.int32))
        elif name == 'attention_mask' and len(input_details) > 1:
            # if model has a second input for attention mask
            interpreter.set_tensor(input_details[1]['index'], arr.astype(np.int32))

    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    probs = tf.nn.softmax(output, axis=-1).numpy()
    pred = int(np.argmax(probs, axis=-1)[0])
    return pred, probs[0].tolist()

# Test
samples = [
    'URGENT: Your account will be suspended. Click http://fake.example to verify',
    'Hey, let us meet tomorrow for lunch',
]
for s in samples:
    pred, probs = predict_tflite(s)
    print(s)
    print('pred:', pred, 'probs:', probs)

In [None]:
# 9) Save artifacts to Drive (optional) and provide download links
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
dst = '/content/drive/MyDrive/phishing_detector_artifacts'
import os, shutil
os.makedirs(dst, exist_ok=True)
shutil.copy('phishing_detector.tflite', dst)
shutil.copytree('./phishing_detector_model', os.path.join(dst, 'phishing_detector_model'), dirs_exist_ok=True)
print('Copied artifacts to', dst)
print('You can download phishing_detector.tflite from your Drive or use the Colab file browser to download directly')

### Done — next steps

- Download `phishing_detector.tflite` and `vocab`/tokenizer files and hand to the mobile dev.
- If the TFLite model size is too large for the app, re-run conversion with more aggressive quantization (int8, with a small calibration dataset) — I can add those steps.
- If you prefer I prepare a Colab notebook file and commit it to the repo, I can create it and add run instructions.

### 9) Package artifacts and copy to Google Drive

This cell packages the TFLite artifacts (float16/dynamic/int8), the tokenizer folder, runs a small evaluation on a sample of `test.csv` (if present), writes a README with sizes and quick metrics, zips the package, and copies it to Drive under `phishing_detector_artifacts_v2`.

Run this cell after you have the `.tflite` files and the tokenizer saved to `./phishing_detector_model` or `./tokenizer`.

In [None]:
# Packaging & export cell: create a package, run light evaluation, and copy to Drive
import os, shutil, json, time, pathlib
from datetime import datetime
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer
try:
    from sklearn.metrics import accuracy_score, precision_recall_fscore_support
    SKLEARN = True
except Exception:
    SKLEARN = False

# Config
DRIVE_DST = '/content/drive/MyDrive/phishing_detector_artifacts_v2'  # change if you want a different path
LOCAL_PACKAGE = 'phishing_detector_package'
EVAL_SAMPLE = 2000  # number of test rows to sample for quick evaluation (set lower if you want faster runs)
MAX_LEN = 128

# Mount Drive (will prompt if not mounted yet)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
os.makedirs(DRIVE_DST, exist_ok=True)
shutil.rmtree(LOCAL_PACKAGE, ignore_errors=True)
os.makedirs(LOCAL_PACKAGE, exist_ok=True)

# Gather available TFLite artifacts
candidates = [
    ('float16', 'phishing_detector.tflite'),
    ('dynamic', 'phishing_detector_dynamic.tflite'),
    ('int8', 'phishing_detector_int8.tflite'),
]
found = []
for qtype, fname in candidates:
    if os.path.exists(fname):
        size_mb = round(os.path.getsize(fname) / (1024*1024), 2)
        shutil.copy(fname, os.path.join(LOCAL_PACKAGE, fname))
        found.append({'quant': qtype, 'file': fname, 'size_mb': size_mb})

# Copy tokenizer / tokenizer directory
tokenizer_src = None
for tok_dir in ('tokenizer', 'phishing_detector_model', './phishing_detector_model'):
    if os.path.exists(tok_dir) and os.path.isdir(tok_dir):
        tokenizer_src = tok_dir
        shutil.copytree(tok_dir, os.path.join(LOCAL_PACKAGE, 'tokenizer'), dirs_exist_ok=True)
        break
if tokenizer_src is None:
    print('Warning: tokenizer folder not found; ensure you include tokenizer files when packaging')

# Light evaluation helper (works on a sample of test set to keep runtime short)

def run_tflite_eval(tflite_path, test_texts, test_labels, max_eval=500):
    # Build interpreter and resize inputs to (1, MAX_LEN)
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    # Resize inputs to (1, MAX_LEN)
    for inp in input_details:
        interpreter.resize_tensor_input(inp['index'], [1, MAX_LEN])
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    # Evaluate on up to max_eval examples
    preds = []
    for text in test_texts[:max_eval]:
        enc = tokenizer(text, truncation=True, padding='max_length', max_length=MAX_LEN, return_tensors='np')
        enc = {k: np.asarray(v) for k,v in enc.items()}
        # set inputs in interpreter order
        for inp in input_details:
            name = inp['name'].lower()
            if 'input_ids' in name:
                arr = enc.get('input_ids')
            elif 'attention_mask' in name:
                arr = enc.get('attention_mask')
            else:
                arr = enc.get('input_ids')
            # cast if interpreter expects int8
            if inp['dtype'] == np.int8:
                arr = arr.astype(np.int8)
            else:
                arr = arr.astype(inp['dtype'])
            interpreter.set_tensor(inp['index'], arr)
        interpreter.invoke()
        out = interpreter.get_tensor(output_details[0]['index'])
        # dequantize if int8 output
        if output_details[0]['dtype'] == np.int8:
            scale, zero_point = output_details[0]['quantization']
            out = (out.astype(np.float32) - zero_point) * scale
        # compute pred
        if out.ndim == 2 and out.shape[1] >= 2:
            pred = int(np.argmax(out, axis=-1)[0])
        else:
            # fallback: sigmoid scenario
            s = 1.0/(1.0+np.exp(-out.ravel()[0]))
            pred = int(s > 0.5)
        preds.append(pred)
    # metrics
    if len(preds) == 0:
        return None
    if SKLEARN:
        acc = accuracy_score(test_labels[:min(len(test_labels), max_eval)], preds)
        p, r, f1, _ = precision_recall_fscore_support(test_labels[:min(len(test_labels), max_eval)], preds, average='binary')
        return {'accuracy': float(acc), 'precision': float(p), 'recall': float(r), 'f1': float(f1), 'n': min(len(test_labels), max_eval)}
    else:
        # simple accuracy fallback
        true = np.array(test_labels[:min(len(test_labels), max_eval)])
        arrp = np.array(preds)
        acc = float((arrp == true).mean())
        return {'accuracy': acc, 'n': min(len(test_labels), max_eval)}

# Prepare test data (sample) if available
tokenizer = AutoTokenizer.from_pretrained('./phishing_detector_model') if tokenizer_src else None
test_df = None
if os.path.exists('test.csv'):
    test_df = pd.read_csv('test.csv')
elif os.path.exists('data/processed/test.csv'):
    test_df = pd.read_csv('data/processed/test.csv')

eval_results = {}
if test_df is not None and 'message' in test_df.columns and 'label' in test_df.columns:
    # ensure numeric labels 0/1
    if test_df['label'].dtype != 'int64' and test_df['label'].dtype != 'int32':
        # try map strings to ints
        test_df['label'] = test_df['label'].map({'phishing':1, 'legitimate':0}).fillna(test_df['label'])
    labels = test_df['label'].astype(int).tolist()
    texts = test_df['message'].astype(str).tolist()
    # Evaluate each found tflite file (only short sample to keep time reasonable)
    for meta in found:
        q = meta['quant']
        fname = meta['file']
        local_path = os.path.join(LOCAL_PACKAGE, fname)
        print('\nEvaluating', fname, 'on a sample of up to', EVAL_SAMPLE, 'rows...')
        res = run_tflite_eval(local_path, texts, labels, max_eval=min(EVAL_SAMPLE, len(texts)))
        eval_results[fname] = res
else:
    print('No test.csv found for evaluation; skipping evaluation step')

# Create README with metadata and evaluation results
readme_path = os.path.join(LOCAL_PACKAGE, 'README.md')
now = datetime.utcnow().isoformat() + 'Z'
with open(readme_path, 'w') as fh:
    fh.write('# Phishing detector artifacts\n')
    fh.write('\nCreated: {}\n'.format(now))
    fh.write('\nModel: distilbert-base-multilingual-cased\n')
    fh.write('max_length: {}\n'.format(MAX_LEN))
    fh.write('input_names: serving_default_input_ids:0, serving_default_attention_mask:0\n')
    fh.write('\nFiles included:\n')
    for m in found:
        fh.write('- {file}  ({quant}, {size_mb} MB)\n'.format(**m))
    if tokenizer_src:
        fh.write('- tokenizer folder: {}\n'.format(tokenizer_src))
    fh.write('\nEvaluation results (sample):\n')
    fh.write(json.dumps(eval_results, indent=2))

# Save a small CSV of sample predictions (optional) -- here we'll skip for brevity unless user wants it explicitly

# Zip package and copy to Drive
archive_name = shutil.make_archive(LOCAL_PACKAGE, 'zip', LOCAL_PACKAGE)
shutil.copy(archive_name, DRIVE_DST)
print('Packaged artifacts ->', archive_name)
print('Copied archive to Drive at', DRIVE_DST)
print('Contents written to', LOCAL_PACKAGE)
