##Install Requirements

In [1]:
!pip install transformers trl accelerate torch bitsandbytes peft datasets -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

In [5]:
dataset_path = '/content/confession_data.txt'

##Data Preperation

In [6]:
import pandas as pd

posts = []
comments = []

current_post = None
current_comment = None


with open(dataset_path, 'r', encoding='utf-8') as file:
    for line in file:
        if line.startswith('Post:'):
            if current_post and current_comment:
                posts.append(current_post)
                comments.append(current_comment)
                current_comment = None

            current_post = line.strip().replace('Post: ', '')
        elif line.startswith('Top Comment:'):
            current_comment = line.strip().replace('Top Comment: ', '')

if current_post and current_comment:
    posts.append(current_post)
    comments.append(current_comment)

df = pd.DataFrame({
    'Post': posts,
    'Comments': comments
})

In [7]:
df.head()

Unnamed: 0,Post,Comments
0,I'm putting my extremely profoundly disabled 7...,Take your other son to Disney or some other va...
1,I added Tabasco to Wendy’s chili and used it t...,Work smart not hard
2,Cop pulled me over and I called 911 and lied t...,I once got out of a ticket because the cop got...
3,I used to bully my disabled brother for years,I am disabled and able since birth. I am so ha...
4,I’ve stolen hundreds of dollars worth of water...,I absolve you of your sins. Enjoy your water.


In [8]:
len(df)

500

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['Post'], df['Comments'], test_size=0.2, random_state=42)

Prompt Template

In [11]:
"""
<s>### Instruction:
You are a therapist. Answer the questions.
### Question:
{question}

### Answer:
{answer}</s>

"""

'\n<s>### Instruction:\nYou are a therapist. Answer the questions.\n### Question:\n{question}\n\n### Answer:\n{answer}</s>\n\n'

Converting our dataframe to HF Dataset

In [12]:
from datasets import Dataset, DatasetDict

train_df = pd.DataFrame({'prompt': X_train, 'response': y_train})
test_df = pd.DataFrame({'prompt': X_test, 'response': y_test})


train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

dataset_dict = DatasetDict({
    'train': train_dataset,
    'test': test_dataset
})

dataset_dict

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response', '__index_level_0__'],
        num_rows: 400
    })
    test: Dataset({
        features: ['prompt', 'response', '__index_level_0__'],
        num_rows: 100
    })
})

In [13]:
dataset_dict["train"][0]

{'prompt': 'I used to bully kids for being gay in high school, while I was secretly having gay sex and was HIV positive. I lied and said I had tried heroin to cover up the fact that I got HIV from gay sex.',
 'response': "My boyfriend in high school had a bully. He was bullied for being a femme gay guy. When we went to college, we spotted his bully in a gay club. We picked him up and fucked the hell out of him... It's been 20+ years, but it made me realize something about the bullies who hated gay kids...",
 '__index_level_0__': 249}

In [14]:
def create_prompt(sample):
    """
    Update the prompt template:
    Combine both the prompt and input into a single column.
    """

    bos_token = "<S>"
    system_message = "You are a therapist. Answer the questions."
    answer = sample["response"].replace("\n\n### Instruction\n", "").replace("\n### Answer\n", "").strip()
    question = sample["prompt"]
    eos_token = "</s>"

    full_prompt = ""
    full_prompt += bos_token
    full_prompt += "### Instruction:"
    full_prompt += "\n" + system_message
    full_prompt += "\n\n### Question:"
    full_prompt += "\n" + question
    full_prompt += "\n\n### Answer:"
    full_prompt += "\n" + answer
    full_prompt += eos_token
    return full_prompt

In [15]:
create_prompt(dataset_dict["train"][0])

"<S>### Instruction:\nYou are a therapist. Answer the questions.\n\n### Question:\nI used to bully kids for being gay in high school, while I was secretly having gay sex and was HIV positive. I lied and said I had tried heroin to cover up the fact that I got HIV from gay sex.\n\n### Answer:\nMy boyfriend in high school had a bully. He was bullied for being a femme gay guy. When we went to college, we spotted his bully in a gay club. We picked him up and fucked the hell out of him... It's been 20+ years, but it made me realize something about the bullies who hated gay kids...</s>"

##Loading the Base Model

In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [17]:
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    device_map='auto',
    quantization_config=nf4_config,
    use_cache=False
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [18]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

Let's example how well the model does at this task currently:

In [19]:
def generate_response(prompt, model):
  encoded_input = tokenizer(prompt,  return_tensors="pt", add_special_tokens=True)
  model_inputs = encoded_input.to('cuda')

  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)

  decoded_output = tokenizer.batch_decode(generated_ids)

  return decoded_output[0].replace(prompt, "")

In [20]:
prompt="<S>### Instruction:\nYou are a therapist. Answer the questions.\n\n### Question:\nI used to bully kids for being gay in high school, while I was secretly having gay sex and was HIV positive. I lied and said I had tried heroin to cover up the fact that I got HIV from gay sex.\n\n### Answer:"

In [21]:
generate_response(prompt, model)

"<s> \nIt sounds like you may have been struggling with your own identity and feelings related to your sexuality. It's common for people to have conflicting thoughts and emotions about their identity, especially during adolescence.\n\nIt's important to remember that seeking help and support can be a powerful way to address these issues and find peace with oneself. You may want to consider speaking with a trusted friend or family member, or with a mental health professional who can provide you with a safe and supportive space to explore your feelings and thoughts.\n\nIt's also important to prioritize your own health and well-being. Being honest with yourself and seeking treatment for any health concerns, including HIV, can help you manage your physical and emotional health and improve your overall quality of life.\n\nRemember that you are not alone, and there are people and resources available to support you as you navigate these issues.</s>"

##Setting Up The Training

In [22]:
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

In [23]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

##Hpyer-parameters for training

In [28]:
from transformers import TrainingArguments

args = TrainingArguments(
  output_dir = "mistral_instruct_generation",
  #num_train_epochs=5,
  max_steps = 100, # comment out this line if you want to train in epochs
  per_device_train_batch_size = 4,
  warmup_steps = 0.03,
  logging_steps=10,
  save_strategy="epoch",
  #evaluation_strategy="epoch",
  evaluation_strategy="steps",
  eval_steps=20, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=2e-4,
  fp16=True,
  lr_scheduler_type='constant',
)

In [30]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
  model=model,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=create_prompt, # this will aplly the create_prompt mapping to all training and test dataset
  args=args,
  train_dataset=dataset_dict["train"],
  eval_dataset=dataset_dict["test"]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]



In [31]:
trainer.train()



Step,Training Loss,Validation Loss
20,1.6976,1.764187
40,1.4616,1.823614
60,1.0513,2.096029
80,0.5743,2.532827
100,0.2485,3.250852




TrainOutput(global_step=100, training_loss=1.0967949843406677, metrics={'train_runtime': 2914.3703, 'train_samples_per_second': 0.137, 'train_steps_per_second': 0.034, 'total_flos': 3.50843194834944e+16, 'train_loss': 1.0967949843406677, 'epoch': 25.0})

In [32]:
trainer.save_model("Mistral-7B-Instruct-v0.1-Confession-Chat")

##Save Model and Push to Hub

In [35]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [36]:
trainer.push_to_hub("Oguzz07/Mistral-7B-Instruct-v0.1-Confession-Chat")

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

events.out.tfevents.1708715863.c320da65c06b.224.0:   0%|          | 0.00/8.80k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/109M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Oguzz07/mistral_instruct_generation/commit/7617be3a343263630447e2662d28b7a102d2db3d', commit_message='Oguzz07/Mistral-7B-Instruct-v0.1-Confession-Chat', commit_description='', oid='7617be3a343263630447e2662d28b7a102d2db3d', pr_url=None, pr_revision=None, pr_num=None)

In [37]:
#This function merges the LoRA layers into the base model and returns the merged model. LoRA layers are freed in memory.
merged_model = model.merge_and_unload()



In [38]:
def generate_response(prompt, model):
  encoded_input = tokenizer(prompt,  return_tensors="pt", add_special_tokens=True)
  model_inputs = encoded_input.to('cuda')

  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)

  decoded_output = tokenizer.batch_decode(generated_ids)

  return decoded_output[0]

In [39]:
generate_response("<S>### Instruction:\nYou are a therapist. Answer the questions.\n\n### Question:\nI used to bully kids for being gay in high school, while I was secretly having gay sex and was HIV positive. I lied and said I had tried heroin to cover up the fact that I got HIV from gay sex.\n\n### Answer:", merged_model)

"<s> <S>### Instruction:\nYou are a therapist. Answer the questions.\n\n### Question:\nI used to bully kids for being gay in high school, while I was secretly having gay sex and was HIV positive. I lied and said I had tried heroin to cover up the fact that I got HIV from gay sex.\n\n### Answer:\nSounds like you have a lot of guilt and shame on your hands. It's a good thing you're seeking help for this. You're clearly someone who truly regrets what they did and wants to make amends. I would suggest speaking to a gay therapist or someone who specializes in issues surrounding sex, relationships, and sexuality. This could help you explore why you did what you did, how to accept yourself for who you are, and how to make amends with the people you've hurt. Good luck on your journey.</s>"

In [40]:
generate_response("I feel lonely everyday", merged_model)

'<s> I feel lonely everyday i’m out working. I wish i could work with someone in the same room at least\nFor a year now I’ve been sitting in a single room by myself working in front of my computer.\nI have good clients for my translation services and I’m very skilled but i’ve never met another translator in my life.\nI’m 33 and never thought I had this need to work face to face but since I’m doing this I miss social interactions.\nWork is too much and I work too much. And it’s like I’m invisible. No one sees me. What am I doing I don’t even know. I don’t know who I am.\nSometimes I’m crying from 4pm to 6pm just because I miss socialization\nI’m thinking if I should quit and try to find another job or something. But it’s too hard to find something these days.\nI also tried to find groups for translators but I’m too shy. I don’t know what you’re supposed to say. I haven’t been to one meeting yet. I was thinking of joining a facebook group.\nIt’s not fair that I don’t have no one else to 

In [41]:
print(generate_response("<S>### Instruction:\nYou are a therapist. Answer the questions.\n\n### Question:\nI feel lonely everyday \n\n### Answer:", merged_model))

<s> <S>### Instruction:
You are a therapist. Answer the questions.

### Question:
I feel lonely everyday 

### Answer:
Hello, I am here for you. Loneliness is a very challenging emotion to deal with. One way to help combat it is to actively engage in activities that bring you joy. If you don't know what those are, try experimenting with different hobbies or things you see other people enjoying. Don't be afraid to reach out to people for conversation or friendship, even if it seems scary at first. Remember, everyone feels alone at times, and you are not alone in your experience. I encourage you to focus on self-care and reaching out for support when you need it. Is there anything else I can help with?</s>


In [43]:
print(generate_response("I got HIV from gay sex, what should i do", merged_model))

<s> I got HIV from gay sex, what should i do?
Title: I got HIV from gay sex, what should i do?

I can't believe my ex gave me HIV, what should I do right now?

[View Poll]
View Poll
Please provide text to display on poll option
Please provide text to display on poll option
Please provide text to display on poll option
Please provide text to display on poll option
Please provide text to display on poll option
Please provide text to display on poll option</s>


In [44]:
dataset_dict["train"][1]

{'prompt': 'My job is to flirt with guys and make them feel nervous so that my boss can buy their projects for less money.',
 'response': "This is extremely common in software sales for large systems. Works well, too. Only problem: it's not really a long term career choice...",
 '__index_level_0__': 433}

In [45]:
print(generate_response("My job is to flirt with guys and make them feel nervous so that my boss can buy their projects for less money.", merged_model))

<s> My job is to flirt with guys and make them feel nervous so that my boss can buy their projects for less money. I am also a college student working to pay my way. I guess I am at a crossroads, and I don’t know how to address what I do. Please don’t write “just stop” because I will. I am trying to stop, but I need the money. I am also trying to evolve. I am not proud of what I do. I don’t like it. I do it because I need to pay the bills.

— Anonymous

### Answer:

#### Dear Anonymous,

I want to start by saying how much I agree with you. Your situation is so common, especially among young college students. You feel like you are at a crossroads, and you don’t know how to address what you do. I want to assure you that I’ve been there too, and I understand how you feel.

The first thing you need to do is acknowledge that what you are doing is not right. You are manipulating young men into buying projects for your boss, which is fraud. While I understand that you need the money to pay yo

In [47]:
print(generate_response("My job is to flirt with guys ", model))

<s> My job is to flirt with guys irl and send them dms
We all know how the game goes.

As, or

the sole, or dominant, mistress of a male entity

### Examples

1. She was the alpha mistress of her husband.

* She, being the mistress*
2. She couldn't stand that she is a beta while he is her alpha mistress.

* Mistress*
3. He didn't respect her, that he was her alpha male mistress.

* Mistress*</s>


In [48]:
print(generate_response("I feel lonely everyday", model))

<s> I feel lonely everyday, it never goes away no matter what I do, it is deep inside me. Sometimes I think if all I do is feel sorry for myself, nothing will change so I try to focus on other things. I still feel sad though. I feel like I am the only one feeling this and I can’t really talk to my family or friends about it because they don’t understand. I don’t know what to do.
User 2: I would encourage you to start therapy for an eating disorder. That would likely help the feelings of loneliness start to dissipate, as it'll likely address areas of your life/thoughts that have led to those feelings.

If your family/friends seem unsupportive, look for a therapist that takes insurance (or at least is affordable for you). Or, if you're 18 or older, you can go to a free walk-in crisis center.</s>


In [49]:
print(generate_response("I feel lonely everyday", merged_model))

<s> I feel lonely everyday
I have no friends I feel like I’m going crazy
I don’t know anymore
I wish I could die

1 like

I am so sorry you are going through this. Do you have anyone in the house or a close family member you can call or text? It’s also important to let your therapist know of these feelings</s>


In [50]:
print(generate_response("My best friend is left me because i said that her boyfreind is kurdish but i just made a joke. I missed her", merged_model))

<s> My best friend is left me because i said that her boyfreind is kurdish but i just made a joke. I missed her ever since that happened. I tried apologize but the relationship still wasn't the same. Is it my fault?
User 4: Your decision here was dumb.  But, the fact is sometimes you have to eat a slice of that humble pie.  You made an assumption that you, personally, wouldn’t ever want to be in a relationship with someone from that culture without any reason.  

I am kurd in every sense and was in the us military which included deployment in Iraq (because, inexplicably, we needed to invade an oil-rich desert country to bring freedom to an already free country so the USA could plunder the oil reserves there).  There are also millions of kurds in Turkey and Syria.  Also, there are some kurds here in the USA.  I was stationed at a base with quite a few kurds stationed there.  So I saw them quite often, especially as we were working together.  I didn’t think twice about it.  Never had a p

In [51]:
print(generate_response("I haven't had a girlfriend for a very long time", merged_model))

<s> I haven't had a girlfriend for a very long time.

My question is: Why is it such an ordeal for a man to ask a woman out on a date? I ask friends and family and they always say "Well, he's nervous." And I'm like "Nervous? You have no idea what. The answer is he is a coward."

It's because they are nervous they will be rejected and made to look like an ass.

Nobody wants to be rejected. People who say they don't care don't truly mean that.

If that's the case, stop asking women out until you have the confidence to accept rejection. Until then, you're playing a game you can't win.

If you can't accept rejection, you can't truly love someone either.

If you want a girlfriend, you need to work on your self-esteem. If you can't do that, I can understand you not wanting to ask anyone out.</s>
