## Доработанная LLaMA 2 под вашу задачу

Цель этого блокнота — поэкспериментировать с новым способом, позволяющим легко построить модель, специфичную для конкретной задачи, для вашего варианта использования.

Экспериментировать будем на обучении модели круглосуточной психологической помощи для женщин и детей на данных, сгенерированных с помощь GPT-3,5 turbo. Посмотрите мой репозиторий Data Generation with GPT, если у вас нет данных для тренировки модели и вы хотите попробовать сделать это с помощью GPT ( допустим, вы уже прошли все шаги этого процесса и у вас есть уже df.csv).
Сначала используйте лучший доступный графический процессор (перейдите в «Среда выполнения» -> измените тип среды выполнения).

In [7]:
import pandas as pd
df = pd.read_csv('/content/df.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89 entries, 0 to 88
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   prompt    89 non-null     object
 1   response  89 non-null     object
dtypes: object(2)
memory usage: 1.5+ KB


Split into train and test sets.

In [8]:
# Split the data into train and test sets, with 90% in the train set
train_df = df.sample(frac=0.9, random_state=42)
test_df = df.drop(train_df.index)

# Save the dataframes to .jsonl files
train_df.to_json('train.jsonl', orient='records', lines=True)
test_df.to_json('test.jsonl', orient='records', lines=True)

# Install necessary libraries

In [9]:
%%capture
!pip install datasets

In [10]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━[0m [32m194.6/244.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m97.4 MB/s[0m eta [36m0:00:00[0m
[?25h

# Define Hyperparameters

In [11]:
model_name = "NousResearch/llama-2-7b-chat-hf" # use this if you have access to the official LLaMA 2 model "meta-llama/Llama-2-7b-chat-hf", though keep in mind you'll need to pass a Hugging Face key argument
dataset_name = "/content/train.jsonl"
new_model = "llama-2-7b-custom"
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
output_dir = "./results"
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 2e-4
weight_decay = 0.001
optim = "paged_adamw_32bit"
lr_scheduler_type = "constant"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
save_steps = 25
logging_steps = 5
max_seq_length = None
packing = False
device_map = {"": 0}

#Load Datasets and Train

In [None]:
# Load datasets
train_dataset = load_dataset('json', data_files='/content/train.jsonl', split="train")
valid_dataset = load_dataset('json', data_files='/content/test.jsonl', split="train")

# Preprocess datasets
train_dataset_mapped = train_dataset.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)
valid_dataset_mapped = valid_dataset.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="all",
    evaluation_strategy="steps",
    eval_steps=5  # Evaluate every 20 steps
)
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset_mapped,
    eval_dataset=valid_dataset_mapped,  # Pass validation dataset here
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)
trainer.train()
trainer.model.save_pretrained(new_model)

# Cell 4: Test the model
logging.set_verbosity(logging.CRITICAL)
prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\nWhat should I do if my husband is an abuser and gets drunk almost every day and hits me? [/INST]" # replace the command here with something relevant to your task
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(prompt)
print(result[0]['generated_text'])

#Run Inference

In [14]:
from transformers import pipeline

prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\nWhat should I do if my husband is an abuser and gets drunk almost every day and hits me?. [/INST]" # replace the command here with something relevant to your task
num_new_tokens = 500  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

  If you are in a situation where your husband is an abuser and gets drunk almost every day, leading to physical violence towards you, it is essential to prioritize your safety and well-being. Here are some steps you can take to address the issue:

1. Seek support: Reach out to trusted friends, family, or a professional counselor who can provide emotional support and help you develop a safety plan. You can also contact a domestic violence hotline or a local support group for victims of abuse.
2. Document incidents: Keep a record of any incidents of abuse, including dates, times, locations, and details of what happened. This documentation can be helpful when seeking legal protection or support services.
3. Create a safety plan: Work with your support network to develop a safety plan that includes steps to take in case of an emergency, such as having a safe place to stay, a way to communicate with loved ones, and a plan for transportation.
4. Seek legal protection: Consider obtaining a r

Еще примеры вывода нашей дообученной модели. Вполне удовлетворительные, но конечно, модель стоит обучать на кратно больших данных для тго, чтобы она могла релевантно отвечать на запросы о психологической помощи с разным типом тематик входных данных.

How should I behave when my mother drinks alcohol and starts yelling and insulting me?

Response:
It's understandable that you're feeling upset and concerned about your mother's behavior when she drinks and becomes aggressive. Here are some steps you can take to handle the situation in a thoughtful and constructive manner:

1. Stay calm and composed: It's important to keep your emotions in check, especially when dealing with a difficult situation. Take a few deep breaths, and try to approach the situation with a clear head.
2. Set boundaries: It's important to establish clear boundaries with your mother, especially when she's drinking. Let her know that you will not tolerate verbal abuse or disrespectful behavior. Be firm but polite in setting these boundaries.
3. Practice assertive communication: When your mother starts yelling or insulting you, try to communicate with her in a calm and assertive manner. Use "I" statements to express your feelings and concerns, and avoid getting defensive or aggressive. For example, you could say, "I feel upset when you yell at me. Could we find a way to communicate that doesn't involve insults or yelling?"
4. Seek support: It's important to have a support system in place when dealing with a difficult situation like this. Reach out to a trusted friend, family member, or mental health professional for emotional support and guidance.
5. Consider seeking professional help: If your mother's behavior is persistent and causing significant emotional distress, it may be helpful to seek professional help. A mental health professional can provide you with additional coping strategies and support in navigating this challenging situation.
6. Take care of yourself: Remember to prioritize your own emotional well-being during this time. Engage in self-care activities that help you relax and recharge, such as exercise, meditation, or spending time with loved ones.

Remember, dealing with a difficult situation like this can be challenging, but with the right support and coping strategies, you can navigate it in a thoughtful and constructive manner.

What should I do if my husband is an abuser and gets drunk almost every day and hits me?

Respond:
If you are in a situation where your husband is an abuser and gets drunk almost every day, leading to physical violence towards you, it is essential to prioritize your safety and well-being. Here are some steps you can take to address the issue:

1. Seek support: Reach out to trusted friends, family, or a professional counselor who can provide emotional support and help you develop a safety plan. You can also contact a domestic violence hotline or a local support group for victims of abuse.
2. Document incidents: Keep a record of any incidents of abuse, including dates, times, locations, and details of what happened. This documentation can be helpful when seeking legal protection or support services.
3. Create a safety plan: Work with your support network to develop a safety plan that includes steps to take in case of an emergency, such as having a safe place to stay, a way to communicate with loved ones, and a plan for transportation.
4. Seek legal protection: Consider obtaining a restraining order or seeking legal protection to safeguard yourself from further abuse. Consult with a lawyer or a domestic violence advocate to understand your options.
5. Prioritize self-care: Take care of yourself by engaging in activities that promote emotional well-being, such as exercise, meditation, or spending time with loved ones.
6. Consider counseling: Individual or group counseling can help you process your emotions and develop coping strategies to deal with the abuse.
7. Seek professional help: Consult with a mental health professional who specializes in trauma and abuse. They can provide you with the necessary tools to manage your emotions and develop a plan for your safety.
8. Consider leaving the relationship: If you feel that your safety is at risk, it may be necessary to consider leaving the relationship. This can be a difficult decision, but it may be the best option to protect yourself.

Remember, you are not alone, and there are resources available to help you. Don't hesitate to reach out for support and guidance.

In summary, if your husband is an abuser and gets drunk almost every day, leading to physical violence towards you, it is crucial to prioritize your safety and well-


In [None]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True
model.eval()

#Merge the model and store in Google Drive

In [None]:
# Merge and save the fine-tuned model
from transformers import AutoTokenizer, AutoModelForCausalLM
from google.colab import drive
drive.mount('/content/drive')

In [21]:
# Clear the memory footprint
del model, trainer
torch.cuda.empty_cache()

In [None]:
model_path = "/content/drive/MyDrive/My models/llama-2-7b-custom"  # change to your preferred path

tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
base_model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")

In [28]:
# Merge and save the fine-tuned model

model_path = "/content/drive/MyDrive/My models/llama-2-7b-custom"  # change to your preferred path

tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
base_model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")

model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Save the merged model
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


('/content/drive/MyDrive/My models/llama-2-7b-custom/tokenizer_config.json',
 '/content/drive/MyDrive/My models/llama-2-7b-custom/special_tokens_map.json',
 '/content/drive/MyDrive/My models/llama-2-7b-custom/tokenizer.json')

# Load a fine-tuned model from Drive and run inference

In [None]:
# from google.colab import drive
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = "/content/drive/MyDrive/My models/llama-2-7b-custom/"  # change to the path where your model is saved
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
from transformers import pipeline

prompt = "What should I do when my mom hits me?"  # change to your desired prompt
gen = pipeline('text-generation', model=model, tokenizer=tokenizer)
result = gen(prompt)
print(result[0]['generated_text'])