# Lab 7 Reproducible Packaging and Release
This is the final step in our Small Language Model lab series. Here we package our tuned model into a reusable, sharable form. The goal is to save adapters, tokenizer, and metadata in a clean structure and optionally push to Hugging Face Hub.

## Step 0 Stable installs

In [ ]:
%pip install -q --force-reinstall numpy==2.0.2 pandas==2.2.2 pyarrow==17.0.0
%pip install -q datasets>=3.0.0 transformers>=4.41.0 peft>=0.11.0 accelerate>=0.29.0 sentencepiece>=0.1.99 tqdm>=4.66.0 bitsandbytes
print("If imports fail, restart runtime and re-run this cell.")

## Step 1 Auto detect best adapters

In [ ]:
from google.colab import drive
from pathlib import Path
import os

drive.mount('/content/drive')
BASE = Path('/content/drive/MyDrive/slm-labs')
assert BASE.exists(), f"Missing {BASE}"

ADAPS=[]
for r,ds,fs in os.walk(BASE):
    if 'adapter_config.json' in fs:
        ADAPS.append(Path(r))
print('Adapters found:')
for i,p in enumerate(ADAPS,1):
    print(i,p)
BEST_DIR = ADAPS[-1] if ADAPS else None
print('Using BEST_DIR:', BEST_DIR)
assert BEST_DIR and BEST_DIR.exists()

## Step 2 Reload base model and adapters

In [ ]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

BASE_MODEL = 'HuggingFaceH4/zephyr-7b-beta'
kw={}
if torch.cuda.is_available():
    try:
        kw=dict(device_map='auto', quantization_config=BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True), torch_dtype=torch.float16)
    except Exception:
        kw=dict(torch_dtype=torch.float16)
else:
    kw=dict(torch_dtype=torch.float32)

Tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if Tok.pad_token is None:
    Tok.pad_token = Tok.eos_token

Base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, **kw)
Tuned = PeftModel.from_pretrained(Base, str(BEST_DIR))
Tuned.eval()
print("Model with adapters ready.")

## Step 3 Save adapters and tokenizer

In [ ]:
REL_DIR = BASE / 'lab7_release'
REL_DIR.mkdir(parents=True, exist_ok=True)

Tok.save_pretrained(str(REL_DIR))
Tuned.save_pretrained(str(REL_DIR))
print("Saved adapters and tokenizer to", REL_DIR)

## Step 4 Write a model card

In [ ]:
card = REL_DIR / 'README.md'
with open(card,'w') as f:
    f.write("# Domain Tuned Small Language Model\n")
    f.write("This model was fine tuned with LoRA adapters as part of the Lab 1–7 SLM training series.\n")
    f.write("\n")
    f.write("## Base Model\n")
    f.write("HuggingFaceH4/zephyr-7b-beta\n")
    f.write("\n")
    f.write("## Training Data\n")
    f.write("Domain text from ncbi/Open-Patients, prepared in Lab 3.\n")
    f.write("\n")
    f.write("## Method\n")
    f.write("LoRA fine tuning with Unsloth, adapters attached in Lab 4, optimized in Lab 5, and evaluated in Lab 6.\n")
    f.write("\n")
    f.write("## Intended Use\n")
    f.write("For experimentation and research. Not for clinical or production use without further validation.\n")
print("Model card saved to", card)

## Step 5 Optional push to Hugging Face Hub

In [ ]:
# To push to Hugging Face Hub, first log in with your token:
# from huggingface_hub import login
# login(token='hf_your_token_here')

# Then run:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# Tok.push_to_hub('your-username/your-model-name')
# Tuned.push_to_hub('your-username/your-model-name')
# print("Pushed to Hugging Face Hub.")