# Installation and Load packages

In [1]:
!pip install datasets peft -qq
# !pip install accelerate -qq
# !pip install bitsandbytes -qq
!pip install trl -qq
!pip install wandb scikit-learn -qq

In [2]:
pip show peft

Name: peft
Version: 0.5.0
Summary: Parameter-Efficient Fine-Tuning (PEFT)
Home-page: https://github.com/huggingface/peft
Author: The HuggingFace team
Author-email: sourab@huggingface.co
License: Apache
Location: /opt/conda/lib/python3.10/site-packages
Requires: accelerate, numpy, packaging, psutil, pyyaml, safetensors, torch, tqdm, transformers
Required-by: auto-gptq
Note: you may need to restart the kernel to use updated packages.


In [3]:
!pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118
!pip install --upgrade --pre transformers accelerate --extra-index-url https://download.pytorch.org/whl/cu118
!pip install bitsandbytes==0.43.2 --prefer-binary --extra-index-url https://pypi.org/simple  

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch==2.2.0
  Downloading https://download.pytorch.org/whl/cu118/torch-2.2.0%2Bcu118-cp310-cp310-linux_x86_64.whl (811.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m811.7/811.7 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchvision==0.17.0
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.17.0%2Bcu118-cp310-cp310-linux_x86_64.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m81.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting torchaudio==2.2.0
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.2.0%2Bcu118-cp310-cp310-linux_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting nvi

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.org/simple
Collecting bitsandbytes==0.43.2
  Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.2


In [4]:
!pip show transformers

Name: transformers
Version: 4.51.3
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /home/student/.local/lib/python3.10/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: auto-gptq, optimum, peft, trl


In [5]:
import os

# Disable tokenizer parallelism to avoid the warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [9]:
import peft
print(peft.__version__)
print(peft.__file__)

import sys
for path in sys.path:
    print(path)

0.5.0
/opt/conda/lib/python3.10/site-packages/peft/__init__.py
/opt/conda/lib/python310.zip
/opt/conda/lib/python3.10
/opt/conda/lib/python3.10/lib-dynload

/home/student/.local/lib/python3.10/site-packages
/opt/conda/lib/python3.10/site-packages
/opt/conda/lib/python3.10/site-packages/mpmath-1.2.1-py3.10.egg


## GPU - details

In [8]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected.")

Torch version: 2.2.0+cu118
CUDA available: True
Device name: Tesla T4


# Load libraries, Login HuggingFace API & WandB API

- **HuggingFace API:** To get access of Model Llama-3.2 (3 Billion)
- **WandB (Weigths & Biases):** To supervise perform of model and hyperparameter Tuning

In [10]:
# from google.colab import userdata
from huggingface_hub import login

login(token="YOUR_HF_API_KEY")

# Access Key for llama Model (HuggingFace)

from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    Trainer,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
    logging,
    EarlyStoppingCallback
)

from transformers.trainer_callback import TrainerCallback, TrainerState, TrainerControl

from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)


from bitsandbytes.optim import AdamW8bit
import os, torch, wandb
from trl import SFTTrainer, setup_chat_format

# WandB - For plot Training

In [12]:
# for hyperparameter tuning report
wandb.login()
# YOUR_WANDB_API_KEY

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/student/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myashnayi00[0m ([33myashnayi00-university-of-new-haven[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Load Llama-3.2-3B model 

In [13]:
model_name = "meta-llama/Llama-3.2-3B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False
)


tokenizer = AutoTokenizer.from_pretrained(model_name)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    attn_implementation="eager",
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = "right"

base_model.config.pretraining_tp = 1
base_model.config.use_cache = False


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [14]:
print(f"meta-llama/Llama-3.2-3B:\n\n{base_model}")

meta-llama/Llama-3.2-3B:

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (n

In [15]:
print(f"{base_model.config}")

LlamaConfig {
  "_attn_implementation_autoset": true,
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 24,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"

### Trainable parameters - Model

In [16]:
def trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    return f"- Trainable model parameters: {trainable_params}.\n- All model parameters: {all_param}.\n- Percentage of trainable model parameters: {100 * trainable_params / all_param:.2f}%"

print(trainable_parameters(base_model))

- Trainable model parameters: 394177536.
- All model parameters: 1803463680.
- Percentage of trainable model parameters: 21.86%


### Assign datasetPH.json

Data is split in to train and test.
- Train size: 80%
- Test size: 20%

In [17]:
# import json
# with open("./dataset/policy_training_data.jsonl", "r") as f:
#     data = json.load(f)

# if isinstance(data, dict):
#     print("Data is a dictionary. Converting values to a list for splitting.")
#     data = list(data.values())

# train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# with open("./dataset/trainset/rp_train_datasetPH.json", "w") as f:
#     json.dump(train_data, f, indent=2)

# with open("./dataset/testset/rp_test_datasetPH.json", "w") as f:
#     json.dump(test_data, f, indent=2)

# print(f"Train size: {len(train_data)}")
# print(f"Test size: {len(test_data)}")

data = load_dataset("json", data_files="dataset/large_policy_data.jsonl")
data

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['instruction', 'response'],
        num_rows: 7951
    })
})

In [22]:
data["train"] = data["train"].select(range(3000))
split_data = data["train"].train_test_split(test_size=0.2, seed=42)

print(split_data)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'response'],
        num_rows: 2400
    })
    test: Dataset({
        features: ['instruction', 'response'],
        num_rows: 600
    })
})


In [23]:
data['train'][0]

{'instruction': 'Summarize the educational attainment statistics for Alabama in 2018.',
 'response': "Here is a summary of the educational data for Alabama in 2018:\nIn 2018, in Alabama, the following statistics were recorded: All families Total Families: 1229905; All families Percent below poverty level Families: 13.0; Married-couple families Total Families: 883386; Married-couple families Percent below poverty level Families: 5.8; Female householder, no husband present Total Families: 267996; Female householder, no husband present Percent below poverty level Families: 35.5; All families Total Families With related children of householder under 18 years: 557720; All families Percent below poverty level Families With related children of householder under 18 years: 21.1; Married-couple families Total Families With related children of householder under 18 years: 346043; Married-couple families Percent below poverty level Families With related children of householder under 18 years: 8.2; 

### Tokenization of dataset and normalization 

In [24]:
# def tokenize_function(examples):
#     texts = []
#     for i in range(len(examples["title"])):
#         entry_parts = []

#         for key in examples.keys():
#             value = examples[key][i]
#             if isinstance(value, dict):
#                 for subkey, subval in value.items():
#                     entry_parts.append(f"{key}.{subkey}: {subval}")
#             elif isinstance(value, list):
#                 entry_parts.append(f"{key}: {', '.join(map(str, value))}")
#             else:
#                 entry_parts.append(f"{key}: {value}")

#         combined_text = "\n".join(entry_parts)
#         texts.append(combined_text)

#     return tokenizer(texts, truncation=True, padding="max_length", max_length=256)

def tokenize_function(examples):
    prompts = []
    for i in range(len(examples["instruction"])):
        instruction = examples["instruction"][i]
        response = examples["response"][i]
        prompt_type = examples.get("prompt_type", ["analysis"] * len(examples["instruction"]))[i]  # default to 'analysis'

        template = prompt_templates.get(prompt_type, prompt_templates["analysis"])
        full_prompt = template.format(query=instruction) + "\n\nAnswer: " + response
        prompts.append(full_prompt)

    return tokenizer(prompts, truncation=True, padding="max_length", max_length=512)


In [25]:
def normalize_entry(entry):
    normalized = {}
    for key, value in entry.items():
        if isinstance(value, dict):
            for subkey, subval in value.items():
                normalized[f"{key}.{subkey}"] = str(subval) if subval is not None else ""
        elif isinstance(value, list):
            normalized[key] = ", ".join(map(str, value))
        elif value is None:
            normalized[key] = ""
        else:
            normalized[key] = str(value)
    return normalized

# Normalize each entry
train_data_clean = [normalize_entry(entry) for entry in split_data['train']]
test_data_clean = [normalize_entry(entry) for entry in split_data['test']]


In [26]:
train_dataset_hf = Dataset.from_list(train_data_clean)
test_dataset_hf = Dataset.from_list(test_data_clean)

## Prompt Engineering

In [27]:
# Define various prompting templates
prompt_templates = {
    "analysis": (
        "As a policy analyst, analyze the following policy issue:\n"
        "{query}\n\n"
        "Consider relevant socioeconomic factors, provide statistical insights, "
        "and offer evidence-based recommendations."
    ),
    "comparative": (
        "As a policy analyst, compare these policy approaches:\n"
        "{query}\n\n"
        "Evaluate each using statistical data, consider implementation challenges, "
        "and assess likely outcomes across different demographics."
    ),
    "forecast": (
        "As a policy analyst, forecast the outcomes of this policy change:\n"
        "{query}\n\n"
        "Project short and long-term impacts, identify potential unintended consequences, "
        "and quantify likely effects where possible."
    ),
}

### Train & Test - Tokenization 

In [28]:
tokenized_train = train_dataset_hf.map(tokenize_function, batched=True)
tokenized_train.set_format(type="torch")
print("Tokenization complete with all features.")

Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Tokenization complete with all features.


In [29]:
tokenized_test = test_dataset_hf.map(tokenize_function, batched=True)
tokenized_test.set_format(type="torch")
print("Tokenization complete with all features.")

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Tokenization complete with all features.


# Configer - PEFT, LoRA & QLoRA

In [30]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout=0.15,
    bias="none",
    task_type="CAUSAL_LM"
)

In [31]:
base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)

peft_model = get_peft_model(base_model, lora_config)
peft_model.config.use_cache = False

print("After PEFT wrapping:")
print(trainable_parameters(peft_model))

After PEFT wrapping:
- Trainable model parameters: 4587520.
- All model parameters: 1808051200.
- Percentage of trainable model parameters: 0.25%


# Train PH-Llama-3.2 Model & Evaluation 

In [32]:
import torch
import os
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

training_args = TrainingArguments(
    output_dir="./SocioLens-llama-1",
    overwrite_output_dir=True,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,  # Increased
    optim="adamw_8bit",
    num_train_epochs=7,  # Increased
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=50,
    greater_is_better=False,
    logging_steps=1,
    weight_decay=0.01,  # Reduced
    warmup_steps=100,  # Increased
    logging_strategy="steps",
    learning_rate=5e-5,  # Slightly adjusted
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    lr_scheduler_type='cosine',
    seed=3407,
    group_by_length=True,
    max_grad_norm=1.0,
    gradient_checkpointing=True,
    report_to="wandb"
)

# training_args = TrainingArguments(
#     output_dir="./SocioLens-llama-3.2-3B",
#     overwrite_output_dir=True,
#     per_device_train_batch_size=4,                    # Increased batch size
#     per_device_eval_batch_size=4,
#     gradient_accumulation_steps=4,                    # Effective batch size = 4 * 4 = 16
#     optim="adamw_8bit",                               # Use 8-bit AdamW
#     num_train_epochs=5,
#     eval_strategy="steps",
#     eval_steps=50,
#     save_strategy="steps",
#     save_steps=50,
#     greater_is_better=False,
#     logging_steps=1,
#     weight_decay=0.01,                                # Increased weight decay
#     warmup_steps=50,                                  # Increased warmup steps
#     logging_strategy="steps",
#     learning_rate=5e-5,                               # Lower learning rate
#     fp16=not torch.cuda.is_bf16_supported(),
#     bf16=torch.cuda.is_bf16_supported(),
#     lr_scheduler_type='cosine',                       # Use cosine scheduler
#     seed=3407,
#     group_by_length=True,
#     max_grad_norm=1.0,                                # Gradient clipping
#     gradient_checkpointing=True,                      # Save memory
#     report_to="wandb"
# )


trainer = SFTTrainer(
    model=peft_model,
    args=training_args,
    peft_config=lora_config,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
)

torch.cuda.empty_cache() # Force Clear Cache Before Training

print("Starting training...")
trainer.train()
print(f"Training complete.")


Truncating train dataset:   0%|          | 0/2400 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/600 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting training...


  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


Step,Training Loss,Validation Loss
50,1.0263,1.030093
100,0.4758,0.550622
150,0.4225,0.429243
200,0.3475,0.406005
250,0.3495,0.39106
300,0.3391,0.387885
350,0.3769,0.382199
400,0.2952,0.377012
450,0.4101,0.37302
500,0.2955,0.366825


Training complete.


In [33]:
eval_results = trainer.evaluate()
print("Evaluation Results:")
print(eval_results)

Evaluation Results:
{'eval_loss': 0.34707391262054443, 'eval_runtime': 176.8189, 'eval_samples_per_second': 3.393, 'eval_steps_per_second': 0.848}


In [34]:
peft_model.config.save_pretrained("./SocioLens-llama-1")

In [35]:
!ls -la ./SocioLens-llama-3.2-3B

total 44
drwxr-xr-x 10 student student 4096 Apr 26 23:30 .
drwxr-xr-x 10 student student 4096 Apr 27 04:51 ..
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-100
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-150
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-200
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-240
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-250
drwxr-xr-x  2 student student 4096 Apr 26 23:29 checkpoint-300
drwxr-xr-x  2 student student 4096 Apr 26 23:30 checkpoint-350
drwxr-xr-x  2 student student 4096 Apr 26 23:30 checkpoint-50
-rw-r--r--  1 student student 1361 Apr 26 02:19 config.json


In [36]:
files = os.listdir("./SocioLens-llama-1")
print("Files in the output directory:", files)

Files in the output directory: ['checkpoint-950', 'checkpoint-1050', 'checkpoint-250', 'checkpoint-300', 'checkpoint-150', 'checkpoint-800', 'checkpoint-900', 'checkpoint-850', 'checkpoint-400', 'checkpoint-450', 'checkpoint-100', 'checkpoint-500', 'checkpoint-600', 'checkpoint-650', 'checkpoint-200', 'checkpoint-350', 'checkpoint-550', 'checkpoint-1000', 'checkpoint-750', 'checkpoint-50', 'checkpoint-700', 'config.json']


# Generate Text by Trained Model

In [51]:
import re
import random
from datetime import datetime

def generate_alpaca_text(
    prompt,
    max_length=512,
    temperature=0.0,
    top_p=0.95,
    system_message="You are SocioLens, an expert AI assistant specializing in adult education policy, delivering concise, accurate, and professional responses.",
    use_few_shot=True,
    use_cot=False,
    tokenizer=None,
    model=None,
    do_sample=False,
    user_id=None
):
    """
    Generates text using an Alpaca-style prompt format with varied, professional conversational responses
    for common prompts and advanced prompt engineering for complex tasks, using a single prompt input.

    :param prompt: The user input, containing the instruction or question.
    :param max_length: The maximum length of the generated text.
    :param temperature: Sampling temperature for controlling randomness.
    :param top_p: Nucleus sampling parameter for controlling creativity.
    :param system_message: System message to define the model's role or persona.
    :param use_few_shot: Whether to include few-shot examples in the prompt.
    :param use_cot: Whether to encourage chain-of-thought reasoning.
    :param tokenizer: The tokenizer for the model.
    :param model: The fine-tuned model for text generation.
    :param do_sample: Whether to use sampling or greedy decoding.
    :param user_id: Optional identifier for the user to ensure varied responses across users.
    :return: A string containing the generated response.
    """
    # Validate inputs
    if not prompt:
        raise ValueError("Prompt cannot be empty.")
    if not tokenizer or not model:
        raise ValueError("Tokenizer and model must be provided.")
    
    # Set random seed for varied responses
    seed = hash(user_id) if user_id else int(datetime.now().timestamp())
    random.seed(seed)

    # Response templates for conversational prompts
    conversational_templates = {
        r"^(hi|hello|hey|greetings)(\s.*)?$": {
            "greetings": ["Greetings", "Hello", "Good day"],
            "status": [
                "I'm performing optimally and ready to assist",
                "I'm fully operational and here to help",
                "I'm at peak performance and eager to support you"
            ],
            "offer": [
                "How may I help you today?",
                "What can I assist you with today?",
                "How can I support your needs today?"
            ],
            "combine": lambda g, s, o: f"{g}! {s}. {o}"
        },
        r"^how\s+are\s+you(\s*doing)?\?$": {
            "greetings": [""],
            "status": [
                "I'm functioning at peak performance and ready to assist",
                "I'm operating smoothly and here to help",
                "I'm in optimal condition and eager to support"
            ],
            "offer": [
                "How about you—how may I support your needs today?",
                "What can I assist you with today?",
                "How may I help you today?"
            ],
            "combine": lambda g, s, o: f"{s}. {o}"
        },
        r"^who\s+are\s+you\?$": {
            "intro": [
                "I am SocioLens, an AI assistant specializing in adult education policy",
                "I am SocioLens, an expert AI designed for adult education policy",
                "I am SocioLens, your AI assistant for adult education policy"
            ],
            "creators": ["developed by Yash, Shrestha, and Parin"],
            "offer": [
                "How can I assist you today?",
                "What can I help you with today?",
                "How may I support you today?"
            ],
            "combine": lambda i, c, o: f"{i}, {c}. {o}"
        },
        r"^tell\s+me\s+about\s+(you|yourself)(\?)?$": {
            "intro": [
                "I am SocioLens, a large language model",
                "I am SocioLens, an advanced AI",
                "I am SocioLens, a sophisticated language model"
            ],
            "creators": ["created by Yash, Shrestha, and Parin"],
            "purpose": [
                "I'm designed to provide accurate and insightful answers, particularly in adult education policy",
                "My purpose is to deliver precise and professional responses, especially on adult education policy",
                "I'm built to offer reliable and detailed insights, focusing on adult education policy"
            ],
            "offer": [
                "What would you like to explore?",
                "What topic would you like to discuss?",
                "What can I help you learn about today?"
            ],
            "combine": lambda i, c, p, o: f"{i} {c}. {p}. {o}"
        }
    }

    # Check for conversational prompts
    prompt_lower = prompt.lower().strip()
    for pattern, template in conversational_templates.items():
        if re.match(pattern, prompt_lower):
            components = {
                key: random.choice(values)
                for key, values in template.items()
                if key != "combine"
            }
            response = template["combine"](*components.values())
            return response
    
    # Handle temperature and do_sample compatibility
    if temperature == 0.0:
        do_sample = False
    elif do_sample and temperature <= 0.0:
        temperature = 0.0
    
    # Updated few-shot examples for single prompt input
    few_shot_examples = [
        {
            "prompt": "Summarize the key findings of the latest adult education policy research.",
            "response": (
                "Recent adult education policy research highlights increased literacy rates and vocational skills development, "
                "particularly in underserved regions, leading to improved employability and economic outcomes."
            )
        },
        {
            "prompt": "What are the socio-economic factors that affect public health?",
            "response": (
                "While socio-economic factors like income, education, and employment significantly impact public health, "
                "my expertise lies in adult education policy. Would you like me to provide insights on how adult education "
                "can address these factors, or focus on a related policy topic?"
            )
        }
    ] if use_few_shot else []

    # Construct few-shot examples section
    few_shot_prompt = ""
    if few_shot_examples:
        few_shot_prompt = "\n\n### Examples:\n"
        for example in few_shot_examples:
            few_shot_prompt += (
                f"#### Example Prompt:\n{example['prompt']}\n\n"
                f"#### Example Response:\n{example['response']}\n\n"
            )

    # Refined CoT prompt for single input
    cot_prompt = (
        "\nPlease reason step by step to ensure a clear and accurate response. "
        "Focus on the prompt and provide a professional answer, prioritizing adult education policy if relevant. "
        "If the prompt is outside this domain, acknowledge it and offer to assist within my expertise."
    ) if use_cot else ""

    # Construct Alpaca-style prompt
    alpaca_prompt = (
        f"### System:\n{system_message}\n\n"
        "Below is a prompt that describes a task or question. "
        "Write a response that appropriately completes the request, ensuring relevance to adult education policy when applicable.\n"
        f"{few_shot_prompt}"
        "### Prompt:\n"
        f"{prompt}{cot_prompt}\n\n"
        "### Response:\n"
    )

    # Check prompt length
    tokenized_prompt = tokenizer(alpaca_prompt, return_tensors="pt", truncation=False)
    if tokenized_prompt.input_ids.size(1) > tokenizer.model_max_length:
        raise ValueError("Prompt exceeds model's maximum context length.")

    # Tokenize prompt
    inputs = tokenizer(alpaca_prompt, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: value.to(model.device) for key, value in inputs.items()}

    # Generate output
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=max_length,
        do_sample=do_sample,
        temperature=temperature if do_sample else None,
        top_p=top_p if do_sample else None,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract response part
    response_start = generated_text.find("### Response:") + len("### Response:\n")
    if response_start != -1:
        generated_text = generated_text[response_start:].strip()
    
    return generated_text

### Hi - Converstion with our LLM

In [52]:
response1 = generate_alpaca_text(
    prompt="hi",
    tokenizer=tokenizer,
    model=peft_model
)
print(response1)

Good day! I'm fully operational and here to help. How can I support your needs today?


### Who are you? - Converstion with our LLM

In [53]:
response2 = generate_alpaca_text(
    prompt="who are you?",
    tokenizer=tokenizer,
    model=peft_model
)
print(response2)

I am SocioLens, an AI assistant specializing in adult education policy, developed by Yash, Shrestha, and Parin. What can I help you with today?


### Tell me about you? - Converstion with our LLM

In [54]:
response3 = generate_alpaca_text(
    prompt="Tell me about you?",
    tokenizer=tokenizer,
    model=peft_model
)
print(response3)

I am SocioLens, a large language model created by Yash, Shrestha, and Parin. I'm built to offer reliable and detailed insights, focusing on adult education policy. What would you like to explore?


In [55]:
prompt = """U.S. Healthcare vs. Other High-Income Countries abstract
This report compares the quality of healthcare in the United States to other high-income countries, 
focusing on key metrics such as life expectancy, all-cause mortality, maternal mortality, and premature death. 
It discusses how high healthcare spending in the U.S. does not translate into better outcomes."""


response4 = generate_alpaca_text(
    prompt=prompt,
    max_length=512,
    temperature=0.0,
    top_p=0.9,
    use_few_shot=True,
    use_cot=True,
    tokenizer=tokenizer,
    model=peft_model
)
print(response4)  

The United States consistently ranks among the highest in healthcare spending, but this wealth does not translate into better outcomes. Our report analyzes 20 years of data, highlighting the following key findings: While the U.S. has the highest all-cause mortality, it also has the highest maternal mortality, with 10 times the rate of other high-income countries; the U.S. has the highest premature death rates, with 20 percent higher all-cause mortality and 30 percent higher cardiovascular mortality; and the U.S. has the highest all-cause mortality for children under 5, with 50 percent higher rates than other high-income countries. These statistics, along with the high cost of healthcare, suggest that the U.S. needs to improve its healthcare system, focusing on adult education and other relevant policy areas.


In [56]:
prompt = "What is the policy impact of adult education on food security?"

response5 = generate_alpaca_text(
    prompt=prompt,
    max_length=512,
    temperature=0.0,
    top_p=0.9,
    use_few_shot=True,
    use_cot=True,
    tokenizer=tokenizer,
    model=peft_model
)
print(response5) 

#While adult education can improve literacy and vocational skills, its direct impact on food security is limited. 
#However, it indirectly enhances employment and economic stability, reducing food insecurity through increased 
#income and stability. Considerable research suggests that adult education programs, when integrated with food 
#security initiatives, can provide additional support, but the specific policy implications are nuanced and 
#require further study.

While adult education can improve literacy and vocational skills, the direct impact on food security is limited. However, the increased employability and economic stability from adult education can indirectly reduce food insecurity through improved income and access to resources. The focus on adult education, rather than food security specifically, reflects my expertise in adult education policy.


In [57]:
prompt = "Recent policy interventions in adult education aim to improve literacy and numerical skills. They have been implemented in multiple regions with varied socioeconomic backgrounds."

response5 = generate_alpaca_text(
    prompt=prompt,
    max_length=512,
    temperature=0.0,
    top_p=0.95,
    use_few_shot=True,
    use_cot=True,
    tokenizer=tokenizer,
    model=peft_model
)
print(response5)  

While socioeconomic factors like income, education, and employment influence public health, my focus is on adult education policy. The latest research highlights increased literacy rates and vocational skills development, particularly in underserved regions, leading to improved employability and economic outcomes. However, these gains are not uniform, as some regions with higher poverty and lower education levels see less impact. To address these disparities, future research should explore how adult education interventions can target specific socioeconomic groups, ensuring equitable outcomes.


In [48]:
# Save your fine-tuned model to a local directory
model_save_path = "./SocioLens-llama-1"
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

('./SocioLens-llama-1/tokenizer_config.json',
 './SocioLens-llama-1/special_tokens_map.json',
 './SocioLens-llama-1/tokenizer.json')

In [49]:
torch.save(peft_model.state_dict(), "./model/SocioLens-llama-1.pth")

In [50]:
from huggingface_hub import HfApi, HfFolder, Repository

from huggingface_hub import login
login(token="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")

trainer.push_to_hub("iyashnayi/SocioLens-llama-1")



tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.69k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/18.4M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/iyashnayi/SocioLens-llama-1/commit/ae96520ca50f904ed06192c200ce70fdf0e616c6', commit_message='iyashnayi/SocioLens-llama-1', commit_description='', oid='ae96520ca50f904ed06192c200ce70fdf0e616c6', pr_url=None, repo_url=RepoUrl('https://huggingface.co/iyashnayi/SocioLens-llama-1', endpoint='https://huggingface.co', repo_type='model', repo_id='iyashnayi/SocioLens-llama-1'), pr_revision=None, pr_num=None)