In [None]:
!pip install transformers==4.28.0



In [None]:
!pip install datasets



In [None]:
import pandas as pd
from transformers import GPT2LMHeadModel,AutoTokenizer, GPT2TokenizerFast
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
from datasets import Dataset

def process_csv(file_path):
    df = pd.read_csv(file_path)
    qa_pairs = []

    for index, row in df.iterrows():
        question = row['prompt']
        answer = row['completion']
        qa_pairs.append(f"Question: {question}\nAnswer: {answer}\n")

    return qa_pairs

def load_dataset(file_path, tokenizer):
    qa_pairs = process_csv(file_path)
    tokenized_dataset = tokenizer(qa_pairs, truncation=True,
                                  padding='max_length', max_length=300,
                                  return_tensors="pt")
    dataset = Dataset.from_dict(tokenized_dataset)
    return dataset

# Load the pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token

# Load and preprocess the dataset
train_dataset = load_dataset("/content/PERSONAL_THERAPIST_TRAIN.csv", tokenizer)
valid_dataset = load_dataset("/content/VLIDATION_P_T.csv", tokenizer)

# Configure and train the model using the Trainer class
training_args = TrainingArguments(
    output_dir="output",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_steps=100,
    save_steps=100,
    warmup_steps=0,
    logging_dir="logs",
    evaluation_strategy="steps",
    save_total_limit=3,
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

trainer.train()


You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
100,No log,1.965425
200,No log,1.855475
300,No log,1.780936
400,No log,1.735985
500,2.070300,1.70972


TrainOutput(global_step=573, training_loss=2.0500433424172395, metrics={'train_runtime': 475.3871, 'train_samples_per_second': 9.605, 'train_steps_per_second': 1.205, 'total_flos': 699058252800000.0, 'train_loss': 2.0500433424172395, 'epoch': 3.0})

In [None]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_patient_therapist_gpt2")


In [None]:

# Load the fine-tuned model
fine_tuned_model = GPT2LMHeadModel.from_pretrained("fine_tuned_patient_therapist_gpt2")

def ask_question(question, model, tokenizer, max_length=300, num_return_sequences=1):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=True)
    outputs = model.generate(
        inputs,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=3,
        do_sample=True,
        temperature=1.0,
        top_k=50,
        top_p=0.9,
        early_stopping=True,
    )

    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.replace(prompt, "").strip()

    # Truncate the answer after the first newline character
    answer = answer.split("\n")[0]

    return answer








In [None]:
# Ask questions using the fine-tuned model
question = "How do i get myself out of a bad mood?"
answer = ask_question(question, fine_tuned_model, tokenizer)
print(f"Question: {question}\nAnswer: {answer}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Question: How do i get myself out of a bad mood?
Answer: There are a few ways you can get yourself out of an unpleasant mood. One way is to find a quiet place to live and to work on your emotions. Another way is by going out and spending time with friends and family. Finally, if you find that you can't cope with the constant noise of life, you can try to find hobbies that help you deal with it.


In [None]:
question = "Tell me about the important relationships in life."
answer = ask_question(question, fine_tuned_model, tokenizer)
print(f"Question: {question}\nAnswer: {answer}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Question: Tell me about the important relationships in life.
Answer: A relationship can be the spark that leads to significant changes in a relationship, whether it's new experiences or friendships. If your partner is content and supportive, he can be a great match for you. If they are insecure and feel stuck, they may not be as supportive as you would like them to be. Don't be afraid to talk to them about what they're going through and to find ways to improve them.


In [None]:

question = "Why it is important to seek therapy during depression"
answer = ask_question(question, fine_tuned_model, tokenizer)
print(f"Question: {question}\nAnswer: {answer}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Question: Why it is important to seek therapy during depression
Answer: There is no easy answer to this question, as it depends on the individual and their specific situation. However, some common reasons why therapy during depressive episodes is important include:


In [None]:
question = "Answer the question as truthfully as possible using the provided text, and if the answer is not contained within the text below, say I don't know question is that who won the 2023 cricket world cup"
answer = ask_question(question, fine_tuned_model, tokenizer)
print(f"Question: {question}\nAnswer: {answer}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Question: Answer the question as truthfully as possible using the provided text, and if the answer is not contained within the text below, say I don't know question is that who won the 2023 cricket world cup
Answer: There is no easy answer to this question, as it depends on your specific situation. However, it is possible that your friend may have simply asked to speak to a coach or player coach in order to help them with their thoughts and strategies. If you are concerned that this could be a miscommunication, it may be best to ask your friend to come to the hotel with you to discuss this issue.
