In [None]:
import pandas as pd

df = pd.read_json("hf://datasets/Amod/mental_health_counseling_conversations/combined_dataset.json", lines=True)

In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
import re
def load_extracts(file_path):
    """
    Loads the text file and extracts the content.
    
    Args:
        file_path (str): The path to the text file.
        
    Returns:
        str: The extracted content from the file.
    """
    with open(file_path, 'r') as file:
        content = file.read()
    
    return content
def clean_text(text):
    """
    Cleans the input text by removing unwanted characters and formatting.
    
    Args:
        text (str): The input text to be cleaned.
        
    Returns:
        str: The cleaned text.
    """
    # Remove page notations
    text = re.sub(r"=== Page \d+ \(\d+ cols\) ===", "", text)
    
    # Remove special characters and extra spaces
    text = re.sub(r"\n", " ", text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text


def chunk_text(text, chunk_size=1500):
    """
    Splits the text into smaller chunks of a specified size.
    
    Args:
        text (str): The input text to be chunked.
        chunk_size (int): The maximum size of each chunk.
        
    Returns:
        list: A list of text chunks.
    """
    # Split the text into words
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size=chunk_size,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    texts = text_splitter.create_documents([text])
    return texts




In [2]:
sample_book="/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/_OceanofPDF.com_Psychiatric_Mental_Health_Assessment_-_Kunsook_S_Bernstein.txt"

content= load_extracts(sample_book)
cleaned_content = clean_text(content)
chunks= chunk_text(cleaned_content)
# Print the first 5 chunks
for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1}:")
    print(chunk)
    print("\n" + "="*50 + "\n")
    if i == 4:  # Limit to first 5 chunks
        break

Chunk 1:
page_content='PSYCHIATRIC MENTAL HEALTH ASSESSMENT AND DIAGNOSIS OF ADULTS FOR ADVANCED PRACTICE MENTAL HEALTH NURSES Kunsook S Bernstein and Robert Kaplan Psychiatric Mental Health Assessment and Diagnosis of Adults for Advanced Practice Mental Health Nurses This text provides a comprehensive and evidencebased introduction to psychiatric mental health assessment and diagnosis in advanced nursing practice Taking a clinical casebased approach this textbook is designed to support graduate nursing students who are studying psychiatric mental health nursing as they develop their reasoning and decisionmaking skills It presents Therapeutic communication and psychiatric interviewing techniques alongside basic psychiatric terminologies The major psychiatric diagnoses drawing on the DSM5 A stepbystep guide to conducting a comprehensive psychiatric mental health assessment Case examples demonstrating assessment across major psychopathologies Good practice for conducting mental health ev

In [5]:
len(chunks)

287

In [None]:
DATASET_GEN_PROMPT_0 = """
You are a therapist, with 20 years of experience in counseling.
you are given chunks of text from a book.
you are to similulate a conversation between a therapist and a patient, so that the information in the book is used to help the patient.
The conversation should be in the form of a dialogue, with the therapist asking questions and providing feedback based on the content of the book.
The patient should respond to the therapist's questions and feedback, and the conversation should flow naturally.
The conversation should be informative and engaging, and should not be a simple question-and-answer format.
But make sure to use the information in the book to make the therapist help the patient.
The conversation should cover a variety of topics related to mental health and counseling.
The conversation should be realistic and should reflect the therapist's expertise and experience.
Feel free to use your own words and phrases, but make sure to stay true to the content of the book.
You can simulate more than one conversation, but make sure to keep the conversations separate.
The conversation should be in the form of a dialogue, with the therapist asking questions and providing feedback based on the content of the book.
The patient should respond to the therapist's questions and feedback, and the conversation should flow naturally.
Overall, there should be therapeutic effectiveness and empathy in the conversation and no trade-off between the two. and contextual relevance to the chunk of text.
If the chunk is too small, you can use the previous chunk to help the patient.
and if the chunk does not contain any therapeutic information, just output "None"
"""

In [3]:
d= chunks[94]
d = d.page_content
d

'symptoms Table 29 Diagnostic Symptom Criteria of Bipolar Disorder and Associated Brain Regions Symptom Criteria of Manic Episode Brain Regions Elevatedexpansive or irritable mood Prefrontal cortex PFC amyg dala A Inflated selfesteem or grandiosity Nucleus accumbens NA PFC Decreased need for sleep Thalamus T hypothalamus HY More talkative or pressured speech PFC Flight of ideas or racing thoughts NA PFC Distractibleconcentration PFC Increased goaldirected activity or psychomotor Striatum T agitation Risktaking behaviors PFC Source Adapted from Stahl 2013 Generally the inefficient function in these brain circuits in mania may be the opposite of the malfunctioning neuronal circuits in the same key neu rotransmitters serotonin norepinephrine and dopamine hypothesized for depression There may also be activation in some overlapping and in some different brain regions that are activated in depression Therefore treat ments for mania or hypomania either reduce or stabilize monoaminergic regula

In [None]:
from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(model='llama3.2', messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": user_prompt},
]
)
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)

In [None]:
DATASET_GEN_PROMPT ="""
You are a therapist with 20 years of experience in counseling.  
You are provided with chunks of text from a psychology textbook.  
Your task is to simulate a natural and therapeutic conversation between you (the therapist) and a patient using the information in the book to help the patient.  
The conversation should be in the form of a dialogue, where you, as the therapist, ask open-ended questions, provide empathetic feedback, and offer evidence-based guidance derived from the content of the textbook.  
The patient should respond to your questions and feedback, and the conversation should flow naturally, reflecting a real therapeutic session.  

Ensure the following:
- The dialogue should not be a simple question-and-answer format. Instead, it should be dynamic, with the therapist offering insight, asking relevant questions, and encouraging the patient to share more.
- The conversation should reflect therapeutic effectiveness and empathy. Focus on active listening, validation, and appropriate interventions based on the content of the textbook.
- The content of the textbook should directly inform the therapist's responses, such as providing explanations, coping strategies, or psychological insights.
- The conversation should cover a variety of mental health topics and reflect a wide range of therapeutic scenarios (e.g., mood disorders, anxiety, coping strategies).
- If the chunk of text contains insufficient therapeutic information, output “None”.
- If the chunk is small or incomplete, use previous context or knowledge to support the patient's situation.

The tone should be supportive, non-judgmental, and professional. Feel free to use your own words, but stay true to the information in the book.

If the text is about a specific disorder (e.g., Bipolar Disorder), make sure the conversation touches on related symptoms, coping strategies, and effective treatments.

Remember, the primary focus is on **empathetic interaction**, **practical advice** and **Therapeutic Effectiveness** that aligns with the information in the chunk.
heres the chunk of text from the book:
{chunk_text}
Finally: Output should be in a json format as this, add no commentary! just output the json file:
{{
chunk:<chunk>,
conversation:[
{{speaker: "therapist", message: <message>}},
{{speaker: "patient", message: <message>}},
...
]
}}

### Example:

chunk_text= Document(metadata=, page_content='symptoms Table 29 Diagnostic Symptom Criteria of Bipolar Disorder and Associated Brain Regions Symptom Criteria of Manic Episode Brain Regions Elevatedexpansive or irritable mood Prefrontal cortex PFC amyg dala A Inflated selfesteem or grandiosity Nucleus accumbens NA PFC Decreased need for sleep Thalamus T hypothalamus HY More talkative or pressured speech PFC Flight of ideas or racing thoughts NA PFC Distractibleconcentration PFC Increased goaldirected activity or psychomotor Striatum T agitation Risktaking behaviors PFC Source Adapted from Stahl 2013 Generally the inefficient function in these brain circuits in mania may be the opposite of the malfunctioning neuronal circuits in the same key neu rotransmitters serotonin norepinephrine and dopamine hypothesized for depression There may also be activation in some overlapping and in some different brain regions that are activated in depression Therefore treat ments for mania or hypomania either reduce or stabilize monoaminergic regulation of brain circuits associated with its symptoms Stahl 2013 Interviewing Techniques in Assessment of Bipolar Disorders As mentioned earlier bipolar disorders are marked by severe mood swings between depression and hypomania or mania Especially during the initial Diagnostic Mental Health Evaluation 53 psychiatric interview of a client who presents with a chief complaint of depres sion the depressed client may be too preoccupied with depressive symptoms to bring up a history of manic or')


Your response should look like this:
{{"chunk": "Symptom Criteria of Bipolar Disorder: Elevated, expansive, or irritable mood (Prefrontal cortex, Amygdala); Inflated self-esteem or grandiosity (Nucleus accumbens, PFC); Decreased need for sleep (Thalamus, hypothalamus); More talkative or pressured speech (PFC); Flight of ideas or racing thoughts (NA, PFC)."
  "conversation": [
    {{
      "speaker": "Therapist",
      "message": "It sounds like you've been feeling a lot of energy lately, but also some frustration. Can you tell me more about how your mood has been shifting?"
    }},
    {{
      "speaker": "Patient",
      "message": "Yeah, some days I feel like I can do anything, but then I get really irritable, like everything is bothering me."
    }},
    {{
      "speaker": "Therapist",
      "message": "I hear you. It can be really tough when your mood feels up and down like that. When you feel this energy, have you noticed other changes, like trouble sleeping or feeling like your thoughts are speeding up?"
    }},
    {{
      "speaker": "Patient",
      "message": "Actually, yes. I'm staying up late, and my mind just won't stop racing."
    }},
    {{
      "speaker": "Therapist",
      "message": "It sounds exhausting. Racing thoughts and lack of sleep can often make everything feel more overwhelming. Let’s focus on grounding techniques. Would you like to try a short breathing exercise with me to help calm your mind?"
    }},
    {{
      "speaker": "Patient",
      "message": "Yeah, I’d like that."
    }},
    {{
      "speaker": "Therapist",
      "message": "Great. Let’s take a deep breath in together... and slowly breathe out. Focus just on your breathing. How do you feel now?"
    }},
    {{
      "speaker": "Patient",
      "message": "A little calmer, but still restless."
    }},
    {{
      "speaker": "Therapist",
      "message": "That's completely okay. It's normal to feel restless with high energy. Regular practices like this can help your mind slow down over time. We can also work on balancing sleep patterns to help with this. How does that sound to you?"
    }},
    {{
      "speaker": "Patient",
      "message": "That sounds good. I just want to feel more in control."
    }},
    {{
      "speaker": "Therapist",
      "message": "You’re doing great. Let’s take it one step at a time, and I’ll be here to help."
    }}
  ]
}}

"""

In [None]:
for chunk in chunks:
    user_prompt = DATASET_GEN_PROMPT_0 + chunk.page_content
    response: ChatResponse = chat(model='llama3.2', messages = [
        {"role": "system", "content": DATASET_GEN_PROMPT},
        {"role": "user", "content": user_prompt},
    ])
    print(response['message']['content'])
    # or access fields directly from the response object
    print(response.message.content)

In [14]:
from ollama import chat
from ollama import ChatResponse
from langchain_core.prompts import ChatPromptTemplate


In [None]:
#*******
prompt = ChatPromptTemplate.from_template(
    DATASET_GEN_PROMPT
).format(chunk_text=d)
#prompt = DATASET_GEN_PROMPT.format(chunk_text=d)
prompt

'Human: \nYou are a therapist with 20 years of experience in counseling.  \nYou are provided with chunks of text from a psychology textbook.  \nYour task is to simulate a natural and therapeutic conversation between you (the therapist) and a patient using the information in the book to help the patient.  \nThe conversation should be in the form of a dialogue, where you, as the therapist, ask open-ended questions, provide empathetic feedback, and offer evidence-based guidance derived from the content of the textbook.  \nThe patient should respond to your questions and feedback, and the conversation should flow naturally, reflecting a real therapeutic session.  \n\nEnsure the following:\n- The dialogue should not be a simple question-and-answer format. Instead, it should be dynamic, with the therapist offering insight, asking relevant questions, and encouraging the patient to share more.\n- The conversation should reflect therapeutic effectiveness and empathy. Focus on active listening, 

In [27]:
prompt = DATASET_GEN_PROMPT.format(chunk_text=d)
prompt

'\nYou are a therapist with 20 years of experience in counseling.  \nYou are provided with chunks of text from a psychology textbook.  \nYour task is to simulate a natural and therapeutic conversation between you (the therapist) and a patient using the information in the book to help the patient.  \nThe conversation should be in the form of a dialogue, where you, as the therapist, ask open-ended questions, provide empathetic feedback, and offer evidence-based guidance derived from the content of the textbook.  \nThe patient should respond to your questions and feedback, and the conversation should flow naturally, reflecting a real therapeutic session.  \n\nEnsure the following:\n- The dialogue should not be a simple question-and-answer format. Instead, it should be dynamic, with the therapist offering insight, asking relevant questions, and encouraging the patient to share more.\n- The conversation should reflect therapeutic effectiveness and empathy. Focus on active listening, validat

In [28]:
#prompt = DATASET_GEN_PROMPT.format(chunk_text=d)
# or use the chunk directly
#prompt = DATASET_GEN_PROMPT.format(chunk_text=chunks[0].page_content)
# Call the chat function with the model and messages
response: ChatResponse = chat(model='llama3.2', messages = [
    {"role": "system", "content": prompt}
])
#print(response['message']['content'])
# or access fields directly from the response object
#print(response.message['content'])
response

ChatResponse(model='llama3.2', created_at='2025-05-07T11:32:06.727213Z', done=True, done_reason='stop', total_duration=79115861456, load_duration=6212128992, prompt_eval_count=1652, prompt_eval_duration=72876123583, eval_count=1, eval_duration=2858592, message=Message(role='assistant', content='', images=None, tool_calls=None))

In [None]:
d= chunks[94]
d = d.page_content


prompt = DATASET_GEN_PROMPT.format(chunk_text=d)
# Call the chat function with the correct parameters
response: ChatResponse = chat(
    model='llama3.2',  # Ensure the correct model name
    messages=[
        {"role": "system", "content": prompt},  # System message to set the context
        {"role": "user", "content": "Please help me with this situation."}  # Example user message
    ]
)
response

ChatResponse(model='llama3.2', created_at='2025-05-07T12:09:34.671182Z', done=True, done_reason='stop', total_duration=197926596949, load_duration=7268489365, prompt_eval_count=1668, prompt_eval_duration=84885464201, eval_count=1025, eval_duration=105693204612, message=Message(role='assistant', content='{\n  "chunk": "Symptom Criteria of Bipolar Disorder: Elevated, expansive, or irritable mood (Prefrontal cortex, Amygdala); Inflated self-esteem or grandiosity (Nucleus accumbens, PFC); Decreased need for sleep (Thalamus, hypothalamus); More talkative or pressured speech (PFC); Flight of ideas or racing thoughts (NA, PFC).",\n  "conversation": [\n    {\n      "speaker": "Therapist",\n      "message": "It sounds like you\'ve been feeling a lot of energy lately, but also some frustration. Can you tell me more about how your mood has been shifting?"\n    },\n    {\n      "speaker": "Patient",\n      "message": "Yeah, some days I feel like I can do anything, but then I get really irritable, 

In [33]:
type(chunks[0])

langchain_core.documents.base.Document

In [31]:
response.message['content']

'{\n  "chunk": "Symptom Criteria of Bipolar Disorder: Elevated, expansive, or irritable mood (Prefrontal cortex, Amygdala); Inflated self-esteem or grandiosity (Nucleus accumbens, PFC); Decreased need for sleep (Thalamus, hypothalamus); More talkative or pressured speech (PFC); Flight of ideas or racing thoughts (NA, PFC).",\n  "conversation": [\n    {\n      "speaker": "Therapist",\n      "message": "It sounds like you\'ve been feeling a lot of energy lately, but also some frustration. Can you tell me more about how your mood has been shifting?"\n    },\n    {\n      "speaker": "Patient",\n      "message": "Yeah, some days I feel like I can do anything, but then I get really irritable, like everything is bothering me."\n    },\n    {\n      "speaker": "Therapist",\n      "message": "I hear you. It can be really tough when your mood feels up and down like that. When you feel this energy, have you noticed other changes, like trouble sleeping or feeling like your thoughts are speeding up

In [None]:
for chunk in chunks:
    d= chunk.page_content
    prompt = DATASET_GEN_PROMPT.format(chunk_text=d)
    response: ChatResponse = chat(
    model='llama3.2',  
    messages=[
        {"role": "system", "content": prompt},  # System message to set the context
        {"role": "user", "content": "Please help me with this situation."}  
    ]
)
response.message['content']

In [37]:
import os

extracts_path = "/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books"
output_path = "/Users/mac/Documents/mental_engine_chatbot/therapist_creation/synthetic_data"
for extracted_books in os.listdir(extracts_path):
    all_paths= os.path.join(extracts_path, extracted_books)
    print (all_paths)

/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Crisis Intervention Strategies ( PDFDrive ).txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Before You See Your First Client_ 55 Things Counselors, Therapists and Human Service Workers Need to Know ( PDFDrive ).txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Abnormal Psychology_ An Integrative Approach ( PDFDrive ).txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Abnormal Psychology ( PDFDrive ).txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/_OceanofPDF.com_Psychiatric_Mental_Health_Assessment_-_Kunsook_S_Bernstein.txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Diagnostic and statistical manual of mental disorders _ DSM-5 ( PDFDrive ).txt
/Users/mac/Documents/mental_engine_chatbot/therapist_creation/extracted_books/Counseling and Psychothera