# setup environment

In [None]:
!pip install torch transformers datasets accelerate peft trl bitsandbytes

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting trl
  Downloading trl-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_

In [None]:
from huggingface_hub import login
login(token="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")  # Replace with your actual token

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

# Load the Model and Tokenizer

In [None]:
# Load the DeepSeek model (adjust model size as needed)
model_name = "stabilityai/stablelm-3b-4e1t"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure pad_token_id is set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Set device: Use CUDA if available
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

tokenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.59G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

StableLmForCausalLM(
  (model): StableLmModel(
    (embed_tokens): Embedding(50304, 2560)
    (layers): ModuleList(
      (0-31): 32 x StableLmDecoderLayer(
        (self_attn): StableLmSdpaAttention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (o_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (attention_dropout): Dropout(p=0.0, inplace=False)
          (rotary_emb): StableLmRotaryEmbedding()
        )
        (mlp): StableLmMLP(
          (gate_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (up_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=2560, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affin

# Configure LoRA for Efficient Fine-Tuning

In [None]:
# Configure LoRA for parameter-efficient fine-tuning
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # Target attention modules
    lora_dropout=0.1,
    bias="none"
)
model = get_peft_model(model, config)

# Load & Preprocess Custom Dataset

In [None]:
import pandas as pd
from datasets import load_dataset

# Load dataset from CSV
df = pd.read_csv("cleaned_dataset.csv")
print(df.head())

# Convert to Hugging Face Dataset
dataset = load_dataset("csv", data_files="cleaned_dataset.csv", split="train")

# Tokenization function
def preprocess_function(examples):
    inputs = [f"Client: {inp}\nTherapist: " for inp in examples["client"]]
    model_inputs = tokenizer(
        inputs, text_target=examples["therapist"], truncation=True, padding="max_length", max_length=512
    )
    return model_inputs

# Tokenize dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True, batch_size=16)

                                              client  \
0  Me and the father of my child have been dating...   
1  I have major depression, severe, PTSD, anxiety...   
2     how do i know of im actually depressed or not?   
3     I cant understand my brain at all, i need help   
4  My depression and social anxiety has gotten so...   

                                           therapist  
0  It sounds like you are feeling very alone and ...  
1  It is understandable that you are feeling upse...  
2  There is no one answer to this question, as ev...  
3  There could be many reasons why you are strugg...  
4  I'm sorry to hear that. Depression and anxiety...  


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/638 [00:00<?, ? examples/s]

# Define Training Arguments

In [None]:
# Split dataset into training and validation sets
train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
val_dataset = train_test_split["test"]

# train the model:

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,
    learning_rate=5e-5,
    num_train_epochs=3,
    fp16=True,
    logging_dir="./logs",
    save_strategy="epoch",
    report_to="none",
    remove_unused_columns=False  # <-- Add this to avoid column mismatch error
)

In [None]:
# Save to Google Drive
from google.colab import drive
drive.mount('/content/drive')
model.save_pretrained('/content/drive/MyDrive/StableLM')
tokenizer.save_pretrained('/content/drive/MyDrive/StableLM')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


('/content/drive/MyDrive/StableLM/tokenizer_config.json',
 '/content/drive/MyDrive/StableLM/special_tokens_map.json',
 '/content/drive/MyDrive/StableLM/tokenizer.json')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Define the device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)


PeftModel(
  (base_model): LoraModel(
    (model): StableLmForCausalLM(
      (model): StableLmModel(
        (embed_tokens): Embedding(50304, 2560)
        (layers): ModuleList(
          (0-31): 32 x StableLmDecoderLayer(
            (self_attn): StableLmSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2560, out_features=2560, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2560, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2560, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_

In [None]:
# Define response generation function
def generate_response(input_text):
    prompt = f"""
   You are a compassionate therapist. Respond with empathetic, supportive, and validating responses that directly address the client's feelings. Focus on making the client feel understood and comforted.

    Client: {input_text}
    Therapist:
    """
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=120,
        temperature=0.5,
        top_p=0.95,
        top_k=50,
        no_repeat_ngram_size=3,
        repetition_penalty=2.0,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "Therapist:" in response:
        response = response.split("Therapist:")[1].strip()
    response = response.split(".")[0].strip() + "."
    if not response.strip() or response == ".":
        response = "I'm here to listen and support you. You are not alone."
    return response

# save the finetuned model:

In [None]:
# Test inputs
test_inputs = [
    "I'm feeling so overwhelmed at work. What should I do?",
    "I can't stop thinking about my breakup. It's consuming me.",
    "I feel like I’m not good enough for my family.",
    "I’ve lost someone I deeply care about, and I can’t move on."
]

# Generate responses
for input_text in test_inputs:
    response = generate_response(input_text)
    print(f"Input: {input_text}")
    print(f"Response: {response}\n")



Input: I'm feeling so overwhelmed at work. What should I do?
Response: I can see how difficult this must be for you right now! You're doing your best to keep up but it seems like there is just too much going wrong all of sudden.

Input: I can't stop thinking about my breakup. It's consuming me.
Response: I'm sorry to hear you're going through this difficult time right now; it sounds like your relationship ended in an extremely painful way for both of us (or maybe just one or two people).

Input: I feel like I’m not good enough for my family.
Response: I can see how you might be feeling this way right now because of what happened to your sister last night at school when she was bullied by her classmates about being overweight or having braces in front everyone else who didn't have them too? It must've been really hard seeing all those kids laughing while they were teasing their friend! But don' t worry - we'll get through it together as long there isn&rsquo;t anything more serious going

In [None]:
test_inputs = [
    "i failed in my test, im sad, how do i cope up?"
]

for input_text in test_inputs:
    response = generate_response(input_text)
    print(f"Input: {input_text}")
    print(f"Response: {response}\n")

Input: i failed in my test, im sad, how do i cope up?
Response: I understand you're feeling upset about your failure but I'm sure there is something positive to be learned from this experience as well! What did it teach u? How can we use what happened here for our future success??


  * Client : My friend has been cheating me,i dont know wat 2 say or dnt knw hw t handle dis situation.



In [None]:
!pip install gguf



In [None]:
# Save the fine-tuned model
model.save_pretrained("./fine_tuned_stableLM")
tokenizer.save_pretrained("./fine_tuned_stableLM")

('./fine_tuned_stableLM/tokenizer_config.json',
 './fine_tuned_stableLM/special_tokens_map.json',
 './fine_tuned_stableLM/tokenizer.json')