In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

batchsiz = 64
blocksiz = 128
epochs = 700
evalIntervals = 100
lr = 3e-4
device = "cuda" if torch.cuda.is_available() else "cpu"
evaliters = 200
nemb = 158
nhead = 4
nlayers = 4
dropout = 0.2


with open('/kaggle/input/200232823/train.csv', 'r', encoding='utf-8') as f:
    txt = f.read()

# Initialize tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")

def enc(txt, tokenizer):
    tokens = tokenizer(txt, return_tensors="pt", truncation=True, padding=False)["input_ids"]
    return tokens.flatten()

data = torch.tensor(enc(txt, tokenizer), dtype=torch.long)

n = int(0.9 * len(data))  # First 90% for training, last 10% for validation
trainData = data[:n]
valData = data[n:]

print(f"Training data size: {trainData.size(0)}")
print(f"Validation data size: {valData.size(0)}")


vocabsiz = len(tokenizer)
print("vocab siz: ", vocabsiz)


def getBatch(split, block_size=128, batch_size=32):
    dataset = trainData if split == "train" else valData
    ix = torch.randint(0, len(dataset) - block_size, (batch_size,))

    x = torch.stack([dataset[i:i + block_size] for i in ix])  # Inputs
    y = torch.stack([dataset[i + 1:i + block_size + 1] for i in ix])  # Targets
    x, y = x.to(device), y.to(device)

    return x, y



@torch.no_grad()
def estimateLoss():
    out = { }
    model.eval()
    for split in ["train", "val"]:
        losses = torch.zeros(evaliters)
        for k in range(evaliters):
            X, Y = getBatch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()

    model.train()
    return out

class Head(nn.Module):
    def __init__(self, headsiz):
        super().__init__()
        self.key = nn.Linear(nemb, headsiz, bias=False)
        self.quary = nn.Linear(nemb, headsiz, bias=False)
        self.value = nn.Linear(nemb, headsiz, bias=False)
        self.dropout = nn.Dropout(dropout)

        self.register_buffer("tril", torch.tril(torch.ones(blocksiz, blocksiz)))

    def forward(self, x):
        B, T, C = x.shape
        k = self.key(x)
        q = self.quary(x)

        w = q @ k.transpose(-2, -1) * k.shape[-1]**-0.5
        w = w.masked_fill(self.tril[:T, :T] == 0, float("-inf"))
        w = F.softmax(w, dim=-1)
        w = self.dropout(w)

        v = self.value(x)

        out = w @ v
        return out

class MultiHeadAttention(nn.Module):
    def __init__(self, nhead, headsiz):
        super().__init__()
        self.heads = nn.ModuleList([Head(headsiz) for _ in range(nhead)])
        self.proj = nn.Linear(headsiz * nhead, nemb)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))

        return out

class FeedForwardNetwork(nn.Module):
    def __init__(self, nemb):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(nemb, 4 * nemb),
            nn.ReLU(),
            nn.Linear(4 * nemb, nemb),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self, nemb, nhead):
        super().__init__()
        headsiz =  nemb // nhead
        self.selfattn = MultiHeadAttention(nhead, headsiz)
        self.ffn = FeedForwardNetwork(nemb)
        self.ln_1 = nn.LayerNorm(nemb)
        self.ln_2 = nn.LayerNorm(nemb)

    def forward(self, x):
        x = x + self.selfattn(self.ln_1(x))
        x = x + self.ffn(self.ln_2(x))

        return x

class GPTLanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.wte = nn.Embedding(vocabsiz, nemb)
        self.wpe = nn.Embedding(blocksiz, nemb)
        self.block = nn.Sequential(*[Block(nemb, nhead=nhead) for _ in range(nlayers)])
        self.ln_finl = nn.LayerNorm(nemb)
        self.lm_head = nn.Linear(nemb, vocabsiz)

        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, ix, targt=None):
        B, T = ix.shape

        tokEmb = self.wte(ix)
        posEmb = self.wpe(torch.arange(T, device=device))
        x = tokEmb + posEmb
        x = self.block(x)
        x = self.ln_finl(x)

        logits = self.lm_head(x)

        if targt is None:
            loss = None

        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targt = targt.view(B*T)
            loss = F.cross_entropy(logits, targt)

        return logits, loss
    def generate(self, idx, max_new_tokens, tokenizer):
        # idx is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # crop idx to the last block_size tokens
            idx_cond = idx[:, -blocksiz:]
            # get the predictions
            logits, loss = self(idx_cond)
            # focus only on the last time step
            logits = logits[:, -1, :]  # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1)  # (B, C)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1)  # (B, 1)
            # append sampled index to the running sequence
            idx = torch.cat((idx, idx_next), dim=1)  # (B, T+1)

        # Decode the generated token indices to text
        generated_text = tokenizer.decode(idx[0].cpu().numpy().tolist(), skip_special_tokens=True)
        return generated_text

model = GPTLanguageModel()
m = model.to(device)
# Use Torch.Compinle,, well Expect that fucking Error
useCompile = False
if useCompile:
    model = torch.compile(model)
optim = torch.optim.AdamW(model.parameters(), lr=lr)

optim = torch.optim.AdamW(model.parameters(), lr=lr)

lossi = []
for i in range(epochs):
    if i % evalIntervals == 0 or i == epochs - 1:
        losses = estimateLoss()
        lossi.append(losses["val"].item())
        print(f"Step {i} | train loss {losses['train']:.4f} | val loss {losses['val']:.4f}")

    xb, yb = getBatch("train")
    logits, loss = model(xb, yb)

    optim.zero_grad()
    loss.backward()
    optim.step()


def saveCheckpnt(model, optimizer, epoch, loss, filepath):
    checkPnt = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "epoch": epoch,
        "loss": loss,
    }
    torch.save(checkPnt, filepath)
    print(f"Checkpoint saved to {filepath}")

# Saving model checkpoint
saveCheckpnt(model, optim, epochs-1, lossi[-1], "TherapyModelTrainFinl.pth")


  data = torch.tensor(enc(txt, tokenizer), dtype=torch.long)


Training data size: 1843
Validation data size: 205
vocab siz:  49152
Step 0 | train loss 10.8293 | val loss 10.8543
Step 100 | train loss 4.1428 | val loss 4.8971
Step 200 | train loss 1.3494 | val loss 3.5188
Step 300 | train loss 0.3244 | val loss 3.5315
Step 400 | train loss 0.1190 | val loss 3.7224
Step 500 | train loss 0.0673 | val loss 3.7361
Step 600 | train loss 0.0504 | val loss 3.8606
Step 699 | train loss 0.0407 | val loss 3.9559
Checkpoint saved to TherapyModelTrainFinl.pth


# Genarate From the Pre-Trained MODEL


# --------------------------------------------------------

In [3]:

context = torch.zeros((1, 1), dtype=torch.long, device=device)  # Initial context
genTxt = model.generate(context, max_new_tokens=500, tokenizer=tokenizer)

print(genTxt) 


 that instead of trying to change the thoughts, you change how you respond to them. You learn skills that allow you to manage difficult thoughts and feelings differently so they don't have the same impact on you that
"I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.
   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.
   How can I change my feeling of being worthless to everyone?","That is present; and to reach out'm worthless with my client to work with my issues with strengthening  self esteem, by guiding my client with CBT practices. CBT practices. CBT helps with gaining a better awareness of how your thought process influences your belief system, and how your beliefs impact your actions and the outcome of your behaviors.  This process isn’t relationships, stress, self esteem, codependency, etc."
"I'm going through some t

# --------------------------------------------------------

# Fine Tune The Model..

In [6]:

################
##############
###
##   FINE TUNE THE MODEL
###
##############
################

# Load the tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")

# Define parameters
batch_size = 64
block_size = 128
epochs = 40  # Fewer epochs for fine-tuning
lr = 1e-5  # Lower learning rate for fine-tuning
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load your pre-trained model
checkpoint_path = "TherapyModelTrainFinl.pth"
model = GPTLanguageModel()
model.load_state_dict(torch.load(checkpoint_path, map_location=device)["model_state_dict"])
model = model.to(device)

# Load the fine-tuning dataset
with open('/kaggle/input/200232823/finetune_train.csv', 'r', encoding='utf-8') as f:
    fine_tune_text = f.read()

# Tokenize the fine-tuning data
def encode_text(text, tokenizer):
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=False)["input_ids"]
    return tokens.flatten()

fine_tune_data = encode_text(fine_tune_text, tokenizer)

# Split the data into training and validation sets
n = int(0.9 * len(fine_tune_data))  # 90% train, 10% validation
train_data = fine_tune_data[:n]
val_data = fine_tune_data[n:]

# Function to create batches
def get_batch(split, block_size=128, batch_size=32):
    dataset = train_data if split == "train" else val_data
    ix = torch.randint(0, len(dataset) - block_size, (batch_size,))
    x = torch.stack([dataset[i:i + block_size] for i in ix])
    y = torch.stack([dataset[i + 1:i + block_size + 1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

# Fine-tune the model
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

for epoch in range(epochs):
    model.train()
    for _ in range(len(train_data) // batch_size):
        xb, yb = get_batch("train")
        logits, loss = model(xb, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation loss
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for _ in range(len(val_data) // batch_size):
            xb, yb = get_batch("val")
            _, loss = model(xb, yb)
            val_loss += loss.item()
    val_loss /= (len(val_data) // batch_size)
    print(f"Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}")

# Save the fine-tuned model
torch.save(model.state_dict(), "TherapyModelFineTuned.pth")
print("Fine-tuning complete. Model saved as TherapyModelFineTuned.pth")

  model.load_state_dict(torch.load(checkpoint_path, map_location=device)["model_state_dict"])


Epoch 1, Validation Loss: 4.6347
Epoch 2, Validation Loss: 4.4912
Epoch 3, Validation Loss: 4.5746
Epoch 4, Validation Loss: 4.4782
Epoch 5, Validation Loss: 4.3949
Epoch 6, Validation Loss: 4.5260
Epoch 7, Validation Loss: 4.4914
Epoch 8, Validation Loss: 4.5722
Epoch 9, Validation Loss: 4.5415
Epoch 10, Validation Loss: 4.4911
Epoch 11, Validation Loss: 4.5734
Epoch 12, Validation Loss: 4.5097
Epoch 13, Validation Loss: 4.5150
Epoch 14, Validation Loss: 4.3287
Epoch 15, Validation Loss: 4.3545
Epoch 16, Validation Loss: 4.5533
Epoch 17, Validation Loss: 4.4574
Epoch 18, Validation Loss: 4.4209
Epoch 19, Validation Loss: 4.6485
Epoch 20, Validation Loss: 4.4906
Epoch 21, Validation Loss: 4.4939
Epoch 22, Validation Loss: 4.4101
Epoch 23, Validation Loss: 4.4335
Epoch 24, Validation Loss: 4.3816
Epoch 25, Validation Loss: 4.5584
Epoch 26, Validation Loss: 4.3650
Epoch 27, Validation Loss: 4.4761
Epoch 28, Validation Loss: 4.5086
Epoch 29, Validation Loss: 4.4997
Epoch 30, Validation Lo

# --------------------------------------------------------

# Chat With The Model..
 * after 10 epoch of fine-tune

In [5]:

# Chat with the fine-tuned model
def chat_with_model(prompt, model, tokenizer, max_new_tokens=50):
    model.eval()
    input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(device)
    with torch.no_grad():
        generated_text = model.generate(input_ids, max_new_tokens, tokenizer)
    return generated_text

# Example chat
while True:
    prompt = input("You: ")
    if prompt.lower() in ["exit", "quit"]:
        break
    response = chat_with_model(prompt, model, tokenizer)
    print(f"Model: {response}")

You:  i feel anxious, and i don't know how to deal with it


Model: i feel anxious, and i don't know how to deal with it.
   How can I change my feeling of being worthless to everyone?",First thing I'd suggest is getting the sleep you need or it will impact how you think and feel. I'd look at finding what just pop in your life and what you can


You:  hi, how are you


Model: hi, how are you respond to them. You learn skills that allow you to manage difficult thoughts and feelings differently so they don't have the same impact on you that they do right now. For some people, they actually DO begin to experience less hurtful thoughts once they learn how


You:  how can i deal with my anxiety


Model: how can i deal with my anxiety, stress, self esteem, codependency, etc."
" neurushed'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.
   I


You:  who are you?


Model: who are you?"," watch out for this, it's hard to do. Our minds are so amazing that the minute you change your thought another one can be right there to take it's place. Without your permission, another sign of a worsening depression. The new thought


You:  how to deal with panic atacks


Model: how to deal with panic atacks to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd


You:  why i fell i'm worthless.


Model: why i fell i'm worthless. When I work with those that are experiencing concerns related to feeling of depression and issues with self esteem suicidal thoughts Collaborative generally work with my client to help build coping skills to reduce level of depression and to assist with strengthening  self esteem, by guiding my


You:  hi


Model: hi even self-273-8255. The text line is #741741. I hope some other colleagues will provide you more suggestions. Be well...Robin Landwehr, DBH, LP


You:  exit


# Chat with the Model after 40 Epoch of Fine-tune

In [7]:

# Chat with the fine-tuned model
def chat_with_model(prompt, model, tokenizer, max_new_tokens=50):
    model.eval()
    input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(device)
    with torch.no_grad():
        generated_text = model.generate(input_ids, max_new_tokens, tokenizer)
    return generated_text

# Example chat
while True:
    prompt = input("You: ")
    if prompt.lower() in ["exit", "quit"]:
        break
    response = chat_with_model(prompt, model, tokenizer)
    print(f"Model: {response}")

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


You:  hi, how are you


Model: hi, how are you as you are!
" springtimelessness is unfortunately common. In fact, if not all, have felt this’t easy but many people, have felt this to some degree at some point in their life. You are not alone. Changing our


You:  i feel anxious how can i deal with it


Model: i feel anxious how can i deal with it helps teach an individual that we don’t always have control over what happens in our lives but we can esteem, codependency, etc."
"ower executives process isn’t always have control how we interpret, toxic relationships, etc."
"


You:  why i feel very unfulfilled 


Model: why i feel very unfulfilled  As for different approach, and there are counselors out there that can help you with this. The idea is that instead of trying to change the thoughts, you change how you respond to them. You learn skills that allow you to manage difficult thoughts and


You:  how can i calm my mind.


Model: how can i calm my mind. Seek out a counselor who provides NCC Butterflyshire with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.
   I've never tried or contemplated suicide. I've always


You:  iv'e attempt to suicide.


Model: iv'e attempt to suicide. I've always wanted to fix my issues, but I never get around to it.
   How can I change my feeling of being worthless to everyone?"," stress, self esteemerally applied."
"Typ once they learn how to manage the ones they


You:  what is your name


Model: what is your name another one can be right there to take it's place. Without your permission, another thought can just pop in there. The new thought may feel worse than the last one! My guess is that you have tried several things to improve this on your own


You:  ok thank you


Model: ok thank you for your question and seeking advice on this. Feelings of worthlessness is unfortunately common. In fact, most people, if not all, have felt this to some degree at some point in their life. You are not alone. Changing our feelings is


You:  exit
