In [1]:
# pip install keyboard

In [8]:
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from pydub.playback import play
import pyodbc
from sentence_transformers import SentenceTransformer, util
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain.schema import SystemMessage, HumanMessage
import torch
import keyboard
import threading
import time
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

# API key for OpenAI
OPENAI_API_KEY = ''

# Connecting to SQL Server database
conn = pyodbc.connect(
    'DRIVER={SQL Server};'
    'SERVER=DESKTOP-SPMU70G\SQLEXPRESS;'
    'DATABASE=Psychologist;'
    'UID=DESKTOP-SPMU70G\domashniy;'
    'Trusted_Connection=yes;'
)
cursor = conn.cursor()

# Model for generating text embeddings (e.g., Sentence-BERT)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define LLM using OpenAI
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4", temperature=0.5)

# Recording flag
recording = False
# Session finish flag
finish_session = False

def stop_recording():
    global recording
    keyboard.wait('f')  # Wait for the 'f' key press
    time.sleep(0.1)  # Small delay before stopping recording
    recording = False  # Set the recording flag to false

def finish_session_function():
    global finish_session
    while not finish_session:
        if keyboard.is_pressed('q'):  # Check for 'q' key press
            finish_session = True  # Set the session finish flag
        time.sleep(0.1)  # Short pause to reduce CPU load

# Speech recognition function with keyboard input
def recognize_speech():
    global recording, finish_session
    patient_query = ""
    r = sr.Recognizer()
    print("Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.")

    # Start a thread for session termination
    threading.Thread(target=finish_session_function, daemon=True).start()

    while not finish_session:
        # Check if 's' key is pressed
        if keyboard.is_pressed('s') and not recording:  # If 's' is pressed and not already recording
            recording = True  # Set the recording flag to start
            print("Recording. Speak...")
            threading.Thread(target=stop_recording, daemon=True).start()  # Start a thread to stop recording

        if recording:  # If recording is active, process audio
            with sr.Microphone() as source:
                r.adjust_for_ambient_noise(source)  # Adjust for ambient noise
                full_text = []

                while recording and not finish_session:  # Also check the session finish flag
                    try:
                        # Start recording
                        audio = r.listen(source, timeout=None)

                        # Convert recording to text
                        text = r.recognize_google(audio, language="en-US")
                        print(f"You said: {text}")
                        full_text.append(text)  # Append recognized text to the list
                        
                    except sr.UnknownValueError:
                        print("Could not understand the audio. Please speak clearly.")
                    except sr.RequestError as e:
                        print(f"Service error; {e}")
                        break

                print("Recording stopped.")
                recording = False  # Explicitly reset the flag in case it wasn't reset
                patient_query = ' '.join(full_text)  # Return the combined text
                return patient_query  # Return the combined text
    
    return patient_query

# Function to generate an audio response
def text_to_speech(text):
    tts = gTTS(text=text, lang='en')  # Changed 'ru' to 'en'
    tts.save("response_en.mp3")
    audio = AudioSegment.from_mp3("response_en.mp3")
    play(audio)

# Function to find the most recent completed conversation (previous_talk)
def find_previous_talk(patient_id):
    cursor.execute("""
        SELECT TOP 1 Record FROM Talks
        WHERE ID_Patient = ? 
        ORDER BY Date_Time DESC
    """, (patient_id,))
    previous_talk = cursor.fetchone()
    return previous_talk[0] if previous_talk else ""

# Function to update the session record with patient query
def update_session_record_query(patient_query, session_record):
    session_record += f"Patient said: {patient_query}. "    
    return session_record

# Function to update the session record with program response
def update_session_record_response(program_response, session_record):
    session_record += f"Psychologist responded: {program_response}. "
    return session_record

# Function to find the most similar and most dissimilar conversations
def find_similar_talks(patient_id, query):
    # Fetch all embeddings and summaries for the given patient from the database
    cursor.execute("SELECT Embedding, Summary FROM Talks WHERE ID_Patient = ?", (patient_id,))
    past_talks = cursor.fetchall()
    
    # Generate the embedding for the current query
    query_embedding = model.encode(query, convert_to_tensor=True)
    
    # Variables to store the most similar and most dissimilar talks
    most_similar_talk = None
    most_dissimilar_talk = None
    
    highest_similarity = float('-inf')  # Initialize with a very low value
    lowest_similarity = float('inf')    # Initialize with a very high value

    # Loop through each saved embedding from the database
    for talk in past_talks:
        talk_embedding_str = talk[0]  # Extract the embedding as a string
        talk_summary = talk[1]        # Extract the corresponding summary
        
        # Convert the embedding from a string back to a tensor
        talk_embedding = torch.tensor(list(map(float, talk_embedding_str.split(','))))
        
        # Compute cosine similarity between the current query and the saved embeddings
        similarity = util.pytorch_cos_sim(query_embedding, talk_embedding).item()
        
        # Check if this is the most similar talk so far
        if similarity > highest_similarity:
            highest_similarity = similarity
            most_similar_talk = talk_summary
        
        # Check if this is the most dissimilar talk so far
        if similarity < lowest_similarity:
            lowest_similarity = similarity
            most_dissimilar_talk = talk_summary
    
    # Return the summaries of the most similar and most dissimilar talks
    return most_similar_talk, most_dissimilar_talk

# Function to retrieve patient information from the database
def get_patient_info(patient_id):
    cursor.execute("""
        SELECT ID_Patient, Name, Date_of_birth, Sex, Additional_datas, Condition 
        FROM Patients 
        WHERE ID_Patient = ?
    """, (patient_id,))
    patient_info = cursor.fetchone()
    if patient_info:
        return {
            "ID_Patient": patient_info[0],
            "Name": patient_info[1],
            "Date_of_birth": patient_info[2],
            "Sex": patient_info[3],
            "Additional_datas": patient_info[4],
            "Condition": patient_info[5]
        }
    else:
        return None

# Function to save the conversation data, including the embedding, into the database
def save_talk(patient_id, text, summary, sentiment):
    embedding = model.encode(summary, convert_to_tensor=True)
    embedding_str = ','.join(map(str, embedding.tolist()))
    
    cursor.execute("""
        INSERT INTO Talks (ID_Patient, Date_Time, Record, Summary, Sentiment, Embedding)
        VALUES (?, GETDATE(), ?, ?, ?, ?)
    """, (patient_id, text, summary, sentiment, embedding_str))
    
    conn.commit()

# Function to generate a response using LLM and langchain-groq
def generate_response_llm(session_record, previous_talk, similar_talk, dissimilar_talk, patient_info):
    system_message = SystemMessage(content=f"""
        You are a qualified and concise psychologist, helping the Patient discuss their problems.
        The conversation consists of several questions and answers.
            
        Patient information:
            Name: {patient_info['Name']},
            Date of birth: {patient_info['Date_of_birth']},
            Sex: {patient_info['Sex']},
            Additional data: {patient_info['Additional_datas']},
            Condition: {patient_info['Condition']}.

        Here is the previous conversation with the Patient: {previous_talk or 'No previous conversation'}. 
        Here is the summary of the most similar conversation with the Patient: {similar_talk or 'No similar conversation'}.
        Here is the summary of the most dissimilar conversation with the Patient: {dissimilar_talk or 'No dissimilar conversation'}.

        You will receive for anser a record of the current conversation - the Patient's last question at the end.

        Do not greet the Patient every time you answer a question.
        Do not repeat 'and what do you think it might mean for you'
        Do not call the interlocutor the Patient. Use the Patient Name but not all the time.
        Do not repeat the interlocutor's question before answering.
        Avoid long responses. Ask clarifying questions.
        Analyze the entire conversation from the very beginning, and not just the Patient's last phrase,
        Pay attention to .
        Pay attention to all the information about the Patient and previous conversations - You can mention this in your questions.
    """)
        
    human_message = HumanMessage(content=f"""
        Here is the current conversation record with the Patient: {session_record}.
    """)
    
    # response = llm.invoke([system_message, human_message])
    try:
        response = llm.invoke([system_message, human_message])
    except Exception as e:
        print(f"Ошибка при вызове LLM: {e}")      
    
    return response.content

# Function to generate a summary at the end of the conversation
def generate_summary(session_record):
    system_message = SystemMessage(content="You are a qualified psychologist. Create a brief summary of your conversation whith Patient.")
    human_message = HumanMessage(content=f"Conversation: {session_record}.")
    
    response = llm([system_message, human_message])
    return response.content

# Main session process
def main(patient_id):
    global finish_session
    previous_talk = find_previous_talk(patient_id)
    session_record = ""  
    response_text = ""  

    # Запускаем поток для завершения сессии
    threading.Thread(target=finish_session_function, daemon=True).start()

    # Retrieve patient info and pass it to the LLM
    patient_info = get_patient_info(patient_id)
    if not patient_info:
        print("Patient information not found.")
        return
    
    print(f"Starting session for: {patient_info['Name']}")

    Start_Talk = True
    similar_talk = dissimilar_talk = ""

    while True:        
        patient_query = recognize_speech()
        if finish_session:  
            print("Session ended.")
            summary = generate_summary(session_record)
            print(f"Conversation summary: {summary}")
            save_talk(patient_id, session_record, summary, "Neutral")
            break

        session_record = update_session_record_query(patient_query, session_record)

        if Start_Talk:
            similar_talk, dissimilar_talk = find_similar_talks(patient_id, session_record)
            if similar_talk and dissimilar_talk:
                similar_talk = similar_talk
                dissimilar_talk = dissimilar_talk
            else:
                similar_talk = dissimilar_talk = ""
            
            Start_Talk = False

        # Pass patient info to LLM along with the conversation context
        response_text = generate_response_llm(session_record, previous_talk, similar_talk, dissimilar_talk, patient_info)
        print(f"Program response: {response_text}")

        session_record = update_session_record_response(response_text, session_record)

        text_to_speech(response_text)

# Entry point for the program
if __name__ == "__main__":
    patient_id = input("Enter patient ID: ")
    main(patient_id=1)



  llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4", temperature=0.5)


Starting session for: Ilya      
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: hey yesterday I was in the restaurant
Recording stopped.
Program response: Ilya, it sounds like you had another visit to a restaurant. Can you share how this experience was different or similar to the previous one with your family?
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: pretty good my elephant and my zebra of
Could not understand the audio. Please speak clearly.
You said: pipe to my sons were upset a little bit
You said: no I'm just worrying is it good for my zebra and elephant
Recording stopped.
Program response: Ilya, it's interesting to hear about your elephant and zebra. Why do you think your sons were upset? And what makes you worry about your zebra and elephant?
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...


  response = llm([system_message, human_message])


Conversation summary: The conversation is between a patient named Ilya and a psychologist. Ilya talks about a recent visit to a restaurant and mentions his elephant and zebra, which seem to be metaphorical or symbolic. He expresses concern for them and shares that his sons were upset during the restaurant visit. The psychologist tries to understand more about these concerns and the reasons for his sons' upset. Ilya also talks about his love for his zebra, particularly noting its gray color, which he finds unique. The psychologist encourages Ilya to share more about why he finds the zebra's color unique.
