In [20]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [12]:
!pip install transformers datasets accelerate bitsandbytes numpy kaggle_secrets
!pip install peft




ERROR: Could not find a version that satisfies the requirement kaggle_secrets (from versions: none)
ERROR: No matching distribution found for kaggle_secrets




In [13]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TrainingArguments, 
    logging
)
from peft import LoraConfig, get_peft_model
from huggingface_hub import login

import bitsandbytes as bnb

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
from transformers import Trainer, TrainingArguments

In [25]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Adjust precision and attention based on GPU
if torch.cuda.get_device_capability()[0] >= 8:
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
    !pip install -qqq flash-attn
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"
# BitsAndBytes configuration for memory-efficient model loading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

model_name = "HuggingFaceTB/SmolLM-135M"  
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config,device_map="auto",attn_implementation=attn_implementation,offload_folder="./offload")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Set EOS token as padding token
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id


In [26]:
# Apply LoRA configuration
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,  
    inference_mode=False,         
    r=16,                         
    lora_alpha=32,                
    lora_dropout=0.1,          
)

# Add LoRA adapters to the model
model = get_peft_model(model, peft_config)

# Freeze all parameters except LoRA parameters
for name, param in model.named_parameters():
    if "lora" not in name:
        param.requires_grad = False  



In [27]:
def preprocess_function(examples):
    #print(examples)

    # Tokenize the question
    inputs = tokenizer(
        examples["question"], 
        max_length=256, 
        truncation=True, 
        padding="max_length"
    )
    
    # Extract the first answer for each example in the batch
    answers = [ans["text"][0] if len(ans["text"]) > 0 else "" for ans in examples["answers"]]
    
    # Tokenize the answers
    outputs = tokenizer(
        answers, 
        max_length=256, 
        truncation=True, 
        padding="max_length"
    )
    
    # Assign tokenized outputs as labels
    inputs["labels"] = outputs["input_ids"]
    return inputs


In [28]:
from datasets import load_dataset

# Load SQuAD dataset
dataset = load_dataset("squad")

# Use only a portion of the dataset
subset_size = 5000  # Adjust this value to control the subset size
train_subset = dataset["train"].shuffle(seed=42).select(range(subset_size))
validation_subset = dataset["validation"].shuffle(seed=42).select(range(subset_size))



tokenized_train_subset = train_subset.map(preprocess_function, batched=True)
tokenized_validation_subset = validation_subset.map(preprocess_function, batched=True)
# Apply preprocessing to the dataset
#tokenized_dataset = dataset.map(preprocess_function, batched=True)


# Check a sample
print(tokenized_train_subset[0])
# Check a sample of the processed dataset
#print(tokenized_dataset['train'][0])


Map: 100%|██████████| 5000/5000 [00:01<00:00, 3856.50 examples/s]
Map: 100%|██████████| 5000/5000 [00:01<00:00, 4490.10 examples/s]

{'id': '573173d8497a881900248f0c', 'title': 'Egypt', 'context': 'The Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery.', 'question': 'What percentage of Egyptians polled support death penalty for those leaving Islam?', 'answers': {'text': ['84%'], 'answer_start': [468]}, 'input_ids': [1780, 7311, 282, 18908, 853, 1007, 1199, 2112, 15919, 327, 967, 5170, 5048, 47, 0, 0, 0, 0, 0,




In [None]:
import os
from transformers import Trainer, TrainingArguments

# Disable W&B for this run only
os.environ["WANDB_MODE"] = "disabled"

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  # Updated from evaluation_strategy
    learning_rate=5e-5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=3,
    weight_decay=0.01,
    fp16=True,
    save_strategy="epoch",
    logging_dir="./logs",
    push_to_hub=False,
    run_name="custom_run_name",  # Optional custom run name for W&B
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_subset,
    eval_dataset=tokenized_validation_subset,
    #train_dataset=tokenized_dataset['train'],
    #eval_dataset=tokenized_dataset['validation'],
)

trainer.train()


                                                   
 33%|███▎      | 313/936 [1:00:08<1:15:21,  7.26s/it]

{'eval_loss': 0.21161581575870514, 'eval_runtime': 952.73, 'eval_samples_per_second': 5.248, 'eval_steps_per_second': 0.656, 'epoch': 1.0}


 53%|█████▎    | 500/936 [1:26:43<1:01:46,  8.50s/it]  

{'loss': 3.1808, 'grad_norm': 0.06035372242331505, 'learning_rate': 2.33974358974359e-05, 'epoch': 1.6}


                                                     
 67%|██████▋   | 626/936 [2:00:28<37:24,  7.24s/it]

{'eval_loss': 0.18455049395561218, 'eval_runtime': 954.1254, 'eval_samples_per_second': 5.24, 'eval_steps_per_second': 0.655, 'epoch': 2.0}


100%|██████████| 936/936 [2:45:35<00:00,  8.61s/it]    

In [18]:
trainer.save_model("./results")  # Save the model
tokenizer.save_pretrained("./results")  # Save the tokenizer


('./results/tokenizer_config.json',
 './results/special_tokens_map.json',
 './results/tokenizer.model',
 './results/added_tokens.json',
 './results/tokenizer.json')

In [19]:
!pip install gradio


  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting gradio
  Downloading gradio-5.8.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.1 (from gradio)
  Downloading gradio_client-1.5.1-py3-none-any.whl.metadata (7.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting starlette<1.0,>=0.40.0 (from gradio)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6

In [20]:
from transformers import pipeline
import gradio as gr

# Load the fine-tuned model and tokenizer
qa_pipeline = pipeline("question-answering", model="./results", tokenizer="./results")

# Function to answer questions
def answer_question(context, question):
    result = qa_pipeline(question=question, context=context)
    return f"Answer: {result['answer']}\nConfidence: {result['score']:.2f}"

# Define the Gradio interface
interface = gr.Interface(
    fn=answer_question,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter context here...", label="Context"),
        gr.Textbox(lines=2, placeholder="Enter question here...", label="Question")
    ],
    outputs="text",
    title="Simple Question Answering Platform",
    description="Provide a context and ask a question to get an answer based on the fine-tuned model."
)

# Launch the interface
interface.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
The model 'Gemma2ForCausalLM' is not supported for question-answering. Supported models are ['AlbertForQuestionAnswering', 'BartForQuestionAnswering', 'BertForQuestionAnswering', 'BigBirdForQuestionAnswering', 'BigBirdPegasusForQuestionAnswering', 'BloomForQuestionAnswering', 'CamembertForQuestionAnswering', 'CanineForQuestionAnswering', 'ConvBertForQuestionAnswering', 'Data2VecTextForQuestionAnswering', 'DebertaForQuestionAnswering', 'DebertaV2ForQuestionAnswering', 'DistilBertForQuestionAnswering', 'ElectraForQuestionAnswering', 'ErnieForQuestionAnswering', 'ErnieMForQuestionAnswering', 'FalconForQuestionAnswering', 'FlaubertForQuestionAnsweringSimple', 'FNetForQuestionAnswering', 'FunnelForQuestionAnswering', 'GPT2ForQuestionAnswering', 'GPTNeoForQuestionAnswering', 'GPTNeoXForQuestionAnswering', 'GPTJForQuestionAnswering', 'IBertForQuesti

* Running on local URL:  http://127.0.0.1:7860
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://db1f0c9c4dc155886f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/gradio/queueing.py", line 624, in process_events
    response = await route_utils.call_process_api(
  File "/opt/conda/lib/python3.10/site-packages/gradio/route_utils.py", line 323, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/conda/lib/python3.10/site-packages/gradio/blocks.py", line 2043, in process_api
    result = await self.call_function(
  File "/opt/conda/lib/python3.10/site-packages/gradio/blocks.py", line 1590, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
    return await future
  File "/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", 

In [10]:
!apt-get update && apt-get install -y git


  pid, fd = os.forkpty()


Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1581 B]
Hit:2 https://packages.cloud.google.com/apt gcsfuse-focal InRelease            
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]      
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease                         
Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1192 kB]
Hit:6 https://packages.cloud.google.com/apt cloud-sdk InRelease                
Hit:7 https://packages.cloud.google.com/apt google-fast-socket InRelease       
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:10 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2454 kB]
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1225 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Package

In [11]:
!git init


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /kaggle/working/.git/


In [14]:
!git remote add origin https://github.com/Schrodingerscat00000/Fine-tuning-LLM-for-Question-Answering.git


In [15]:
!git add .


In [16]:
!git commit -m "Initial Kaggle project commit"


Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@297204ea226a.(none)')


In [20]:
!git config --global user.email "avropiyas824@gmail.com"
!git config --global user.name "Schrodingerscat00000"


In [21]:
!git commit -m "Initial Kaggle project commit"


On branch master

Initial commit

nothing to commit (create/copy files and use "git add" to track)


In [22]:
# Rename the branch to main
!git branch -M main


In [23]:
!git add .
!git commit -m "Initial Kaggle project commit"


On branch main

Initial commit

nothing to commit (create/copy files and use "git add" to track)


In [24]:
!git push -u origin main


error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/Schrodingerscat00000/Fine-tuning-LLM-for-Question-Answering.git'
[m