In [None]:
# 📦 1. Install required dependencies
!pip install -q "dvc[gdrive]" transformers==4.12.0 datasets mlflow seqeval
!pip install -q git+https://github.com/huggingface/transformers.git
!pip show transformers

In [None]:
# 🧠 2. Clone the repo from the correct branch
# 🚫 Remove the nested repo folder if it exists
!rm -rf amharic-telegram-ecommerce-ner

# ✅ Then re-clone cleanly
!git clone --branch task-3 https://github.com/Teshager21/amharic-telegram-ecommerce-ner.git

# Move into the clean project directory
%cd amharic-telegram-ecommerce-ner

In [None]:
# 🔐 3. Upload service account key and configure DVC remote
from google.colab import files
import shutil, os
from pathlib import Path

# Upload the file
uploaded = files.upload()
filename = next(iter(uploaded))  # get the uploaded filename

# Move it to a safe path (avoid special characters)
key_path = Path("/content/sa_key.json")
shutil.move(filename, key_path)

# Configure DVC to use the uploaded key
!dvc remote modify gdrive_remote gdrive_use_service_account true
!dvc remote modify gdrive_remote gdrive_service_account_json_file_path {key_path}

In [None]:
# ✅ 4. Pull data using DVC (with service account)
import subprocess

def dvc_pull_with_feedback():
    print("📥 Pulling DVC-tracked data from Google Drive remote...")
    result = subprocess.run(["dvc", "pull", "-v"], capture_output=True, text=True)
    if result.returncode == 0:
        print("✅ DVC pull successful.")
    else:
        print("❌ DVC pull failed:")
        print(result.stderr)

dvc_pull_with_feedback()


In [None]:
# ✅ Colab cell to run fine-tuning with updated Hydra config (no old keys)
import os
os.environ["WANDB_MODE"] = "offline"
!PYTHONPATH=src python src/train/main.py \
  model.name_or_path=xlm-roberta-base \
  training.epochs=3 \
  training.batch_size=16 \
  training.evaluation_strategy=epoch \
  data.train_file=data/labeled/train_split.conll \
  data.eval_file=data/labeled/eval_split.conll \
  output_dir=models/ner \
  logging.level=INFO

python: can't open file '/home/teshager/Documents/10Academy/repositories/projects/amharic-telegram-ecommerce-ner/notebooks/training/src/train/main.py': [Errno 2] No such file or directory


In [None]:
# 🧠 Evaluate NER Model
from src.eval.evaluate import evaluate

LABEL_LIST = [
    "O",
    "B-PRODUCT", "I-PRODUCT",
    "B-PRICE", "I-PRICE",
    "B-LOC", "I-LOC",
]

In [None]:
# Paths for model and test data
MODEL_PATH = "models/ner"  # Path to your fine-tuned model directory
TEST_DATA_PATH = "data/processed/ner_test.conll"  # Path to your test dataset in CoNLL format


In [None]:
# Run evaluation
try:
    evaluate(
        model_path=MODEL_PATH,
        data_path=TEST_DATA_PATH,
        label_list=LABEL_LIST
    )
except Exception as e:
    print(f"Evaluation failed: {e}")


In [None]:
# Launch MLflow UI in Colab
!mlflow ui --port 5000 &

# To open the UI, click the link below after running this cell:
print("MLflow UI should be running on port 5000")


In [None]:
# 📊 View MLflow Run (Optional)
!ls -lh mlruns/