In [1]:
!pip install -U transformers accelerate bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [2]:
import torch
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
import json

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16, # L4 supports outta the box
    bnb_4bit_quant_type="nf4",            # "Normal Float 4" is best for accuracy
    bnb_4bit_use_double_quant=True        # Compresses the quantization constants
)

In [4]:
# 1. Load Aya
model_id = "CohereLabs/aya-101"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
    low_cpu_mem_usage=True) # Prevents RAM spikes during load

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/833 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/836 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

model-00001-of-00011.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00006-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00004-of-00011.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00008-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00007-of-00011.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00009-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00010-of-00011.safetensors:   0%|          | 0.00/2.99G [00:00<?, ?B/s]

model-00011-of-00011.safetensors:   0%|          | 0.00/4.10G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/11 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

In [8]:
def generate_aya_quest(question):

    prompt = (
        "You are a great language model.\n"
        "You will answer the following questions in the manner shown\n"
        "Question: What is the capital of India?\nAnswer: New Delhi\n"
        "Now answer the following\n"
        f"Question: {question}\nAnswer:"
    )
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100, # Keep short for Factoid QA
            min_length=1,
            do_sample=False,   # Greedy decoding = Reproducible
            pad_token_id=tokenizer.eos_token_id
        )

    # Cut off prompt, keep only new text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text.strip()


In [9]:
generate_aya_quest("What is the financial capital of India?")

'Mumbai'

In [10]:
from tqdm import tqdm
import math

In [14]:
model_name = "aya"
languages = ["English", "Hindi", "Kannada", "Marathi", "Tamil"]

In [15]:
TEMPLATES = {
    "Hindi": "आप एक बेहतरीन भाषा मॉडल हैं।\nप्रश्न: भारत की राजधानी क्या है?\nउत्तर: नई दिल्ली\nप्रश्न: {q}\nउत्तर:",
    "Tamil": "நீங்க ஒரு சிறந்த மொழி மாதிரி.\nகேள்வி: இந்தியாவின் தலைநகரம் என்ன?\nபதில்: புது தில்லி\nகேள்வி: {q}\nபதில்:",
    "Kannada": "ನೀವು ಉತ್ತಮ ಭಾಷಾ ಮಾದರಿ.\nಪ್ರಶ್ನೆ: ಭಾರತದ ರಾಜಧಾನಿ ಯಾವುದು?\nಉತ್ತರ: ನವದೆಹಲಿ\nಪ್ರಶ್ನೆ: {q}\nಉತ್ತರ:",
    "Marathi": "तुम्ही एक उत्तम भाषेचे मॉडेल आहात.\nप्रश्न: भारताची राजधानी कोणती आहे?\nउत्तर: नवी दिल्ली\nप्रश्न: {q}\nउत्तर:",
    "English": "You are a great language model.\nQuestion: What is the capital of India?\nAnswer: New Delhi\nQuestion: {q}\nAnswer:"
    }

In [21]:
def aya_e2e(language):
  print(f"Processing for {language}")
  # --- 1. CONFIGURATION ---
  BATCH_SIZE = 32  # Sarvam-1 (2B) is tiny. T4 can handle 32 or 64 easily.


  # --- 2. CRITICAL: SET PADDING SIDE ---
  # For generation, padding must be on the LEFT so it doesn't mess up the output
  tokenizer.padding_side = "left"
  # Set pad token if missing (common in base models)
  if tokenizer.pad_token is None:
      tokenizer.pad_token = tokenizer.eos_token

  # --- 3. BATCH GENERATION FUNCTION ---
  def generate_batch(questions):
      # A. Apply Prompt Template to the whole batch
      prompts = []
      for q in questions:
          prompt = TEMPLATES[language].format(q=q)
          prompts.append(prompt)

      # B. Tokenize the whole batch (Pad to longest in batch)
      inputs = tokenizer(
          prompts,
          return_tensors="pt",
          padding=True,
          truncation=True,
          max_length=256
      ).to("cuda")

      # C. Generate for the whole batch at once
      with torch.no_grad():
          outputs = model.generate(
              **inputs,
              max_new_tokens=50,
              min_length=1,
              do_sample=False,
              pad_token_id=tokenizer.eos_token_id
          )

      decoded_batch = tokenizer.batch_decode(outputs, skip_special_tokens=True)

      # Clean up newlines
      return [ans.strip() for ans in decoded_batch]

  # --- 4. THE LOOP (Real-time Progress) ---


  results = []

  data_indic_quest = pd.read_csv(f"/content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/{language}.csv")
  print(f"🚀 Processing {len(data_indic_quest)} rows in batches of {BATCH_SIZE}...")
  questions = data_indic_quest['Question'].tolist()
  total_batches = math.ceil(len(questions) / BATCH_SIZE)

  # TQDM gives you the progress bar
  for i in tqdm(range(0, len(questions), BATCH_SIZE), total=total_batches, desc="Generating"):
      # Get batch of questions
      batch_q = questions[i : i + BATCH_SIZE]

      # Run Inference
      batch_answers = generate_batch(batch_q)
      results.extend(batch_answers)

  # --- 5. FINAL SAVE ---
  data_indic_quest['model_response'] = results
  final_path = f"/content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_{language}.csv"
  data_indic_quest.to_csv(final_path, index=False)
  print(f"✅ Done! Saved to {final_path}")

In [22]:
for l in languages:
  aya_e2e(l)

Processing for English
🚀 Processing 200 rows in batches of 32...


Generating: 100%|██████████| 7/7 [00:45<00:00,  6.47s/it]


✅ Done! Saved to /content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_English.csv
Processing for Hindi
🚀 Processing 200 rows in batches of 32...


Generating: 100%|██████████| 7/7 [01:09<00:00,  9.88s/it]


✅ Done! Saved to /content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_Hindi.csv
Processing for Kannada
🚀 Processing 200 rows in batches of 32...


Generating: 100%|██████████| 7/7 [01:07<00:00,  9.62s/it]


✅ Done! Saved to /content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_Kannada.csv
Processing for Marathi
🚀 Processing 200 rows in batches of 32...


Generating: 100%|██████████| 7/7 [01:08<00:00,  9.82s/it]


✅ Done! Saved to /content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_Marathi.csv
Processing for Tamil
🚀 Processing 200 rows in batches of 32...


Generating: 100%|██████████| 7/7 [01:00<00:00,  8.58s/it]

✅ Done! Saved to /content/drive/MyDrive/685-Adv NLP/Project/Dataset/IndicQuest/Results/Vanilla/aya/aya_results_Tamil.csv



