<a href="https://colab.research.google.com/github/AnasEhtisham/FYP/blob/main/Copy_of_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch numpy tokenizers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Name:", torch.cuda.get_device_name(0))

CUDA Available: True
CUDA Device Name: Tesla T4


In [None]:
# Step 1a: Pin fsspec to match datasets’ requirement
!pip install -q fsspec==2025.3.0

# Step 1b: Install datasets (will pull in fsspec[http] ≤2025.3.0)
!pip install -q datasets --upgrade

# Step 1c: Install transformers, accelerate, peft, bitsandbytes, and huggingface_hub
# but do NOT install any of their dependencies (to keep your CUDA/cuBLAS etc intact)
!pip install -q transformers accelerate peft bitsandbytes huggingface_hub --upgrade --no-deps


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.1/362.1 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.3/489.3 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Step 2: Import Libraries and Log in to Hugging Face
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from huggingface_hub import notebook_login, whoami

# Log in to Hugging Face only if not already logged in
try:
    print("Already logged in as:", whoami()["name"])
except:
    notebook_login()

Already logged in as: Anas-Ehtisham


In [None]:
# Step 3: Choose CSV Source and Prepare Data
print("--- Step 3: Preparing Data from CSV ---")

# Choose source: "colab" or "gdrive"
csv_source = "colab"  # or "gdrive"

if csv_source == "gdrive":
    from google.colab import drive
    drive.mount('/content/drive')
    csv_file_path = "/content/Dataset.csv"
else:
    from google.colab import files
    uploaded = files.upload()
    csv_file_path = next(iter(uploaded))

# Define column names (update if your column names are different)
job_description_col = "job_description"
proposal_col = "proposal"

# Load the CSV and validate structure
import pandas as pd
try:
    df = pd.read_csv(csv_file_path)
    df = df[[job_description_col, proposal_col]].dropna()
    df[job_description_col] = df[job_description_col].str.strip()
    df[proposal_col] = df[proposal_col].str.strip()
    raw_dataset = Dataset.from_pandas(df)
    print(f"\nSuccessfully loaded dataset. Number of samples: {len(raw_dataset)}")
    print(f"First example: {raw_dataset[0]}")
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

--- Step 3: Preparing Data from CSV ---


Saving Dataset.csv to Dataset.csv

Successfully loaded dataset. Number of samples: 84
First example: {'job_description': 'We are seeking a skilled developer to build a simple AI agent or automation tool that scans real estate listings in Orange County, NC and flags properties that may qualify for subdivision under local zoning rules. The agent should: Pull listing data from sources like Zillow, Redfin, or MLS Cross-reference each listing with Orange County GIS and zoning data Evaluate subdivision potential (e.g. lot size, frontage, zoning district) Calculate value per acre for investment analysis Deliver results via email, spreadsheet, or web dashboard — updated on a recurring schedule (e.g. daily or weekly) Ideal candidate will have experience with: AI agents or automation (e.g. Python scripts, Zapier, LangChain) Web scraping or API integration for real estate data Zoning logic, GIS parcel matching Google Sheets API, email automation, or simple dashboard tools This is an exciting proj

In [None]:
# Step 4: Load the Base Model with 4-bit Quantization
model_id = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

# Step 5: Load the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# Step 5: Load the Tokenizer and Prepare Model
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = prepare_model_for_kbit_training(model)


In [None]:
# Step 6: Apply LoRA Configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 8,388,608 || all params: 6,746,804,224 || trainable%: 0.1243


In [None]:
# Step 7: Tokenize the Dataset
def tokenize_function(example):
    prompt = f"### Job Description:\n{example[job_description_col]}\n\n### Proposal:\n{example[proposal_col]}"
    return tokenizer(
        prompt,
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_dataset = raw_dataset.map(tokenize_function, remove_columns=raw_dataset.column_names)

Map:   0%|          | 0/84 [00:00<?, ? examples/s]

In [None]:
# Step 8: Setup Data Collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
# Step 9: Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    save_steps=100,
    fp16=True,
    push_to_hub=False,
    report_to="none"
)

In [None]:
# Step 10: Initialize Trainer and Train the Model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.2208


TrainOutput(global_step=15, training_loss=2.20781987508138, metrics={'train_runtime': 324.7123, 'train_samples_per_second': 0.776, 'train_steps_per_second': 0.046, 'total_flos': 4389882348699648.0, 'train_loss': 2.20781987508138, 'epoch': 2.571428571428571})

In [None]:
# Step 11: Save LoRA Adapters and Push Model
adapter_output_dir = "./lora_adapter"
model.save_pretrained(adapter_output_dir)
print(f"Saved LoRA adapter to {adapter_output_dir}")

Saved LoRA adapter to ./lora_adapter


In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.29.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModelForCausalLM
import torch
import gradio as gr

# Define model and adapter path
model_id = "meta-llama/Llama-2-7b-hf"
adapter_output_dir = "./lora_adapter"

# Load your BitsAndBytes config (used earlier)
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)


In [None]:
!pip install bitsandbytes



In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True  # this flag *should* be here inside BitsAndBytesConfig
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
# Load tokenizer and set pad token
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# No device argument (avoid conflict with accelerate)
generator = pipeline(
    "text-generation",
    model=inference_model,
    tokenizer=tokenizer,
)


Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoFo

In [None]:
def generate_proposal(job_desc: str, max_length: int = 256):
    prompt = f"""### Job Description:
{job_desc}

### Proposal:
"""
    outputs = generator(
        prompt,
        max_length=max_length,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        do_sample=True
    )
    text = outputs[0]["generated_text"]
    if "### Proposal:" in text:
        return text.split("### Proposal:", 1)[1].strip()
    return text.strip()


In [None]:
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 AI Freelance Proposal Generator (LoRA Fine-tuned LLaMA2)")

    with gr.Row():
        job_input = gr.Textbox(label="📝 Job Description", lines=10, placeholder="Enter job description...")

    with gr.Row():
        max_length = gr.Slider(minimum=50, maximum=1024, value=256, step=10, label="Maximum Output Length")

    with gr.Row():
        generate_button = gr.Button("🚀 Generate Proposal")

    with gr.Row():
        proposal_output = gr.Textbox(label="💡 Generated Proposal", lines=10)

    generate_button.click(fn=generate_proposal, inputs=[job_input, max_length], outputs=proposal_output)

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fd8e17e5e212d36556.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


