<a href="https://colab.research.google.com/github/SHIVASHANKAR-V07/Llama_3_Indian_Gender_Classifier/blob/main/Llama3_Indian_Gender_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1] IMPORTING DEPENDENCIES :-**

In [None]:
!pip install "unsloth @ git+https://github.com/unslothai/unsloth.git"

!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
!pip install unsloth_zoo

# **2] LOADING DATASET :-**

### **a) Connecting Drive :**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### **b) JSON Format of Dataset :**

In [None]:
# Importing Libraries
import pandas as pd
import json
import os

# Setting file path
input_csv = "/content/drive/MyDrive/Train_Labels_Dataset.csv"
output_file = "/content/drive/MyDrive/train_dataset.jsonl"

# Mapping Labels
label_map = {
    0: "neutral",
    1: "male",
    2: "female"
}

# Prompt Template
prompt_template = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Identify the gender of the given Indian name.

### Input:
{name}

### Response:
{gender}"""

# Loading CSV
df = pd.read_csv(input_csv)

# JSONL File Creation
with open(output_file, "w") as f:
    for _, row in df.iterrows():
        # Lowercasing Names
        name_val = str(row['Name']).strip().lower()

        # Labels Mapping
        gender_val = label_map.get(row['Label'], "neutral")

        # Prompt Formatting
        full_text = prompt_template.format(name=name_val, gender=gender_val)

        # EOS token for Llama 3
        full_text += " <|end_of_text|>"

        # Saving
        json.dump({"text": full_text}, f)
        f.write("\n")

# Verifying
print(f"Successfully converted {len(df)} rows!")
print(f"File saved at: {output_file}")
!head -n 5 {output_file}

# **3] MODEL INTEGRATION :-**
- ## unsloth/llama-3-8b-bnb-4bit ü¶•ü¶ô

### **a) Checkpoints Storage :**

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Directory to store checkpoints
DRIVE_OUTPUT_DIR="/content/drive/MyDrive/Llama3_FineTune_Checkpoints"

### **b) Resume Training - Error Blocker :**

In [None]:
# Run this only when resuming the training to tackle "UnpicklingError"
import torch
import numpy as np

# 1. Store the original torch.load function reference
original_load = torch.load

# 2. Define the patched function
def patched_load(*args, **kwargs):
    """Overrides torch.load to set weights_only=False for safe checkpoint loading."""
    # Force the weights_only argument to False to bypass the NumPy global block
    kwargs['weights_only'] = False

    # Optional: You can also try allow-listing the NumPy global if the above doesn't work:
    # from torch.serialization import add_safe_globals
    # add_safe_globals([np.core.multiarray._reconstruct])

    return original_load(*args, **kwargs)

# 3. Replace the official torch.load with your patched version
torch.load = patched_load

print("Successfully patched torch.load to allow checkpoint resumption.")

import os
# Set this to prevent Unsloth from using its custom compiled cache
os.environ['UNSLOTH_ALWAYS_RESTART_TRAINER'] = 'True'

### **c) Unsloth Latest Version :**

In [None]:
!pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo

### **d) Fine-Tuning Model :**

In [None]:
# Importing Model Integration Libraries
from unsloth import FastLanguageModel
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments
import torch

model_name = "unsloth/llama-3-8b-bnb-4bit" # Base-Model

# Loading Model and Tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

# Customizing Model Architecture
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                     "gate_proj", "up_proj", "down_proj",], # Relation Processing ( adaptation )
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# Load Dataset
dataset = load_dataset("json", data_files = "/content/drive/MyDrive/train_dataset.jsonl", split = "train")

# Training the model
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text", # Tokenization
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = True,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        bf16 = torch.cuda.is_bf16_supported(),
        fp16 = not torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        output_dir = DRIVE_OUTPUT_DIR, # Output Folder
        save_strategy = "steps",
        save_steps = 500,
        save_total_limit = 2,
        seed = 3407,
        report_to = "none",
    ),
)

print("Training Session has started !")

trainer.train(resume_from_checkpoint = True)  # 'False' for First-Time

print("Training Completed...")

# **4] MODEL INFERENCE TESTING :-**

### **a) Single-Name Testing :**

In [None]:
from unsloth import FastLanguageModel

FastLanguageModel.for_inference(model)

# Test Prompt
# Should be the same prompt format used during training !!!
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Identify the gender of the given Indian name.

### Input:
{}

### Response:
"""

test_name = "pragathish"

# Run Inference
inputs = tokenizer(
    [
        prompt.format(test_name) # Test Name
    ], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)

# Ensuring the test prompt isn't generated in the output
input_len = inputs.input_ids.shape[1]
final_output = outputs[:, input_len:]

result = tokenizer.batch_decode(final_output, skip_special_tokens = True)[0]

print(f"Name: {test_name.ljust(10)} | Predicted: {result.strip()}")

### **b) Multi-Name Testing :**

In [None]:
test_names = ["ragul", "ananya", "kisan", "preeti"]

for name in test_names:
    inputs = tokenizer([prompt.format(name)], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 10)

    # Ensuring the test prompt isn't generated in the out
    prediction = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

    print(f"Name: {name.ljust(10)} | Predicted: {prediction.strip()}")

# **5] SAVING MODEL :-**
>  Save the model as soon as the **"Training"** is finished

> Use any one of the below **"Saving Mechanism"** according to the use-case and storage capacity available


> **"Mount your Drive"** and run these below code blocks


### **a) LoRA Adapters :**
- ***Developers ( ~200 MB )***

In [None]:
model_path = "/content/drive/MyDrive/Fine_Tuned_Model/LoRA"

model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

print(f"‚úÖ Model successfully saved to {model_path}")

### **b) GGUF :**
- ***Laptop Users ( ~5 GB ) - runs on CPU***

In [None]:
model_path = "/content/drive/MyDrive/Fine_Tuned_Model/GGUF"

model.save_pretrained_gguf(
    model_path,
    tokenizer,
    quantization_method = "q4_k_m"
)

print(f"‚úÖ GGUF format successfully saved to: {model_path}")

### **c) MERGED :**
- ***Production API's ( ~16.0 GB ) - faster deployment***

In [None]:
model_path = "/content/drive/MyDrive/Fine_Tuned_Model/Merged"

model.save_pretrained_merged(
    model_path,
    tokenizer,
    save_method = "merged_16bit" # Use 'merged_4bit' for saving space (~5.5GB)
)

print(f"‚úÖ Merged model successfully saved to: {model_path}_merged")

# **6] PUBLISHING MODEL :-**
- ## huggingface ü§ó

- ## Two ways to publish -


      1.   Using the "Inference Model"
      2.   Using the "Drive Folders"



### **a) Logging into Hugging Face :**
- ***Using "write" token - Saved in 'Secrets'***
- ***"Llama-3-Indian-Gender-Classifier"*** **- Public Model Repository**

In [None]:
from google.colab import userdata
from huggingface_hub import login, HfApi

api = HfApi()
hf_token = userdata.get('HF_TOKEN') # Accessing Secrets
login(hf_token) # Using "write" token

print("‚úÖ Successfully logged in via Colab Secrets!")

# Model Repository
repo_id = "shisha-07/Llama-3-Indian-Gender-Classifier"

### **b) Connecting to Drive :**

In [None]:
# Connecting with Drive
from google.colab import drive
drive.mount('/content/drive')

### **c) Using the "Inference Model" :**

#### **i) Loading Inference Model -**

In [None]:
from unsloth import FastLanguageModel

# Specifying the File Path
model_path = '/content/drive/MyDrive/Fine_Tuned_Model/LoRA'

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path, # Loading the Inference Model
    max_seq_length = 2048,
    load_in_4bit = True,
)

print("‚úÖ Successfully Loaded Inference Modal")

#### **ii) LoRA Adapter Model -**


In [None]:
model.push_to_hub(
    repo_id,
    commit_message="Initial upload of LoRA Adapters"
)
tokenizer.push_to_hub(
    repo_id,
    commit_message="Uploaded Tokenizer"
)

print("LoRA Success!")

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

#### **iii) GGUF Model -**

In [None]:
model.push_to_hub_gguf(
    repo_id,
    tokenizer,
    quantization_method = "q4_k_m",
    commit_message="Added GGUF q4_k_m Version for Local Inference (Ollama/LM Studio)"
)

print("GGUF Success")

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

#### **iv) Merged Model -**

In [None]:
model.push_to_hub_merged(
    repo_id,
    tokenizer,
    save_method = "merged_16bit",
    commit_message="Added Standalone 16-bit Merged Model for Production Deployment"
)

print("Merge Success")

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

### **d) Using the "Drive Folders" :**

#### **i) LoRA Adapter Model -**

In [None]:
print("Uploading LoRA...")

api.upload_folder(
    folder_path="/content/drive/MyDrive/Fine_Tuned_Model/LoRA",
    path_in_repo="LoRA", # Folder in HF
    repo_id=repo_id,
    commit_message="Uploading LoRA adapters"
)

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

#### **ii) GGUF Model -**

In [None]:
print("Uploading GGUF...")

api.upload_folder(
    folder_path="/content/drive/MyDrive/Fine_Tuned_Model/GGUF_gguf",
    path_in_repo="GGUF", # Folder in HF
    repo_id=repo_id,
    commit_message="Uploaded GGUF q4_k_m"
)

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

#### **iii) Merged Model -**

In [None]:
print("Uploading Merged...")

api.upload_folder(
    folder_path="/content/drive/MyDrive/Fine_Tuned_Model/Merged",
    path_in_repo="Merged", # Folder in HF
    repo_id=repo_id,
    ignore_patterns=["**/.cache/*", "**/README.md"],
    commit_message="Uploaded Merged Model Weights"
)

print(f"üöÄ Success! View your model at: https://huggingface.co/{repo_id}")

# **7] USING THE MODEL :-**

### **a) LoRA Adapters :**

In [None]:
from unsloth import FastLanguageModel
from huggingface_hub import snapshot_download
import torch
import os

# Variables
MODEL_ID = "shisha-07/Llama-3-Indian-Gender-Classifier"
NAME_TO_TEST = "Aarav"

print(f"‚¨áÔ∏è Downloading adapter files from '{MODEL_ID}/LoRA'...")

download_path = snapshot_download(
    repo_id=MODEL_ID,
    allow_patterns=["LoRA/*"],
    local_dir="downloaded_adapters",
)

local_adapter_path = os.path.join(download_path, "LoRA")

print(f"‚úÖ Adapters downloaded to: {local_adapter_path}")

# Loading Base Model
print(f"üîÑ Loading Base Llama-3 Model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit", # Official base model (Safe from config errors)
    max_seq_length = 2048,
    load_in_4bit = True,
)

# Loading Adapters
print(f"üîó Attaching Adapters...")
model.load_adapter(local_adapter_path) # Now we load from the folder we just downloaded
FastLanguageModel.for_inference(model)

# Run Inference
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Identify the gender of the given Indian name.

### Input:
{}

### Response:
"""

print(f"üöÄ Testing Name: {NAME_TO_TEST}")
inputs = tokenizer(
    [alpaca_prompt.format(NAME_TO_TEST)],
    return_tensors = "pt"
).to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens = 10,
    use_cache = True
)

decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
prediction = decoded.split("### Response:")[-1].strip()

print("\n" + "="*30)
print(f"Name:       {NAME_TO_TEST}")
print(f"Prediction: {prediction}")
print("="*30)

### **b) MERGED :**

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id="shisha-07/Llama-3-Indian-Gender-Classifier"

tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="Merged")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    subfolder="Merged",
    torch_dtype=torch.float16,
    device_map="auto"
)

prompt = "Name: aarav\nGender:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=5)
print(tokenizer.decode(output[0], skip_special_tokens=True))

### **c) GGUF :**

In [None]:
!pip install llama-cpp-python
from llama_cpp import Llama

# Download the .gguf file from HF 'GGUF' folder first
model = Llama(
    model_path="/content/drive/MyDrive/Fine_Tuned_Model/GGUF_gguf/llama-3-8b.Q4_K_M.gguf", # Path of GGUF file
    n_ctx=2048,
)

output = model(
    "Name: priya\nGender:",
    max_tokens=10,
    stop=["\n"]
)

print(output["choices"][0]["text"])

# **8] EVALUATING THE MODEL :-**

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from sklearn.metrics import accuracy_score, classification_report, f1_score
from tqdm import tqdm

# --- PATH ---
MODEL_PATH = "/content/drive/MyDrive/Fine_Tuned_Model/Merged"

# --- TEST DATA ---
test_data = [
    # Male Names
    {"name": "Arjun", "label": "male"}, {"name": "Vikram", "label": "male"},
    {"name": "Rohan", "label": "male"}, {"name": "Ishaan", "label": "male"},
    {"name": "Mohammed", "label": "male"}, {"name": "Siddharth", "label": "male"},
    {"name": "Aditya", "label": "male"}, {"name": "Varun", "label": "male"},
    {"name": "Karan", "label": "male"}, {"name": "Abhishek", "label": "male"},
    {"name": "Sanjay", "label": "male"}, {"name": "Rajesh", "label": "male"},
    {"name": "Aakash", "label": "male"}, {"name": "Kartik", "label": "male"},
    {"name": "Harish", "label": "male"}, {"name": "Ganesh", "label": "male"},
    {"name": "Manish", "label": "male"}, {"name": "Prateek", "label": "male"},
    {"name": "Vivek", "label": "male"}, {"name": "Ashok", "label": "male"},
    {"name": "Vijay", "label": "male"}, {"name": "Nitin", "label": "male"},
    {"name": "Rahul", "label": "male"}, {"name": "Manoj", "label": "male"},
    {"name": "Dilip", "label": "male"},

    # Female Names
    {"name": "Deepika", "label": "female"}, {"name": "Saritha", "label": "female"},
    {"name": "Ananya", "label": "female"}, {"name": "Kavita", "label": "female"},
    {"name": "Fatima", "label": "female"}, {"name": "Priyanka", "label": "female"},
    {"name": "Meenakshi", "label": "female"}, {"name": "Shweta", "label": "female"},
    {"name": "Tanvi", "label": "female"}, {"name": "Riya", "label": "female"},
    {"name": "Nandini", "label": "female"}, {"name": "Pooja", "label": "female"},
    {"name": "Sneha", "label": "female"}, {"name": "Ishani", "label": "female"},
    {"name": "Amrita", "label": "female"}, {"name": "Divya", "label": "female"},
    {"name": "Jyoti", "label": "female"}, {"name": "Rashmi", "label": "female"},
    {"name": "Simran", "label": "female"}, {"name": "Lata", "label": "female"},
    {"name": "Bhavna", "label": "female"}, {"name": "Sonal", "label": "female"},
    {"name": "Preeti", "label": "female"}, {"name": "Geeta", "label": "female"},

    # Neutal Names ---
    {"name": "Kiran", "label": "neutral"},
    {"name": "Sonu", "label": "neutral"},
    {"name": "Suman", "label": "neutral"},
    {"name": "Krishna", "label": "neutral"},
    {"name": "Happy", "label": "neutral"},
    {"name": "Lucky", "label": "neutral"},
    {"name": "Deepu", "label": "neutral"},
    {"name": "Gurpreet", "label": "neutral"},
    {"name": "Sukhdeep", "label": "neutral"},
    {"name": "Raj", "label": "male"} # Control Check
]

# --- LOAD MODEL ---
print(f"üîÑ Loading model from: {MODEL_PATH}...")

# 4-bit quantization configuration (Efficient for T4 GPU)
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=quant_config,
    device_map="auto"
)

# Ensure tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# --- INFERENCE LOOP ---
print(f"\nüöÄ Evaluating {len(test_data)} names...")

y_true = [item['label'] for item in test_data]
y_pred = []

# Alpaca Prompt Template (Must match training!)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Identify the gender of the given Indian name.

### Input:
{}

### Response:
"""

for item in tqdm(test_data):
    # Format input using template
    inputs = tokenizer(
        [alpaca_prompt.format(item['name'])],
        return_tensors="pt"
    ).to("cuda")

    # Generate output
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            use_cache=True,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode output
    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Extract prediction
    try:
        # Getting text after '### Response:' and remove whitespace
        prediction = decoded.split("### Response:")[-1].strip().lower()
        prediction = prediction.split()[0] if prediction else "unknown"
    except:
        prediction = "error"

    y_pred.append(prediction)

# --- RESULTS ---
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"\n‚ú® FINAL EVALUATION RESULTS ‚ú®")
print(f"‚úÖ Accuracy: {accuracy:.1%}")
print(f"‚öñÔ∏è F1 Score: {f1:.4f}")

print("\nüìã Detailed Classification Report:")
print(classification_report(
    y_true,
    y_pred,
    labels=["male", "female", "neutral"],
    zero_division=0
))

# Mismatch Analysis
print("\n‚ùå Mismatches:")
mismatches = [(data['name'], data['label'], pred) for data, pred in zip(test_data, y_pred) if data['label'] != pred]

if not mismatches:
    print("None! Perfect score on this set.")
else:
    print(f"{'Name':<15} | {'True Label':<10} | {'Predicted':<10}")
    print("-" * 40)
    for name, true, pred in mismatches:
        print(f"{name:<15} | {true:<10} | {pred:<10}")