In [9]:
# pip install keyboard

In [None]:
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from pydub.playback import play
import pyodbc
from sentence_transformers import SentenceTransformer, util
from langchain.schema import SystemMessage, HumanMessage
import torch
import keyboard
import threading
import time
from langchain.chat_models import ChatOpenAI
import time
import functools
import librosa
import numpy as np

def import_llm_models():
    # API key for OpenAI
    OPENAI_API_KEY = '...'
    # Define LLM using OpenAI
    llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="ft:gpt-4o-2024-08-06:personal:psychologist-1:APyJnbej", temperature=0.5) #gpt-4o-2024-08-06 with fine tuning

    # Model for generating text embeddings (very little Sentence-BERT model)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    return llm, model

def log_execution_time(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()  
        result = func(*args, **kwargs)  
        end_time = time.time()  
        execution_time = end_time - start_time  
        print(f"Execution time for '{func.__name__}': {execution_time:.4f}  seconds.")
        return result
    return wrapper

def stop_recording():
    global recording
    keyboard.wait('f')  # Wait for the 'f' key press
    time.sleep(0.1)  # Small delay before stopping recording
    recording = False  # Set the recording flag to false

def finish_session_function():
    global finish_session
    while not finish_session:
        if keyboard.is_pressed('q'):  # Check for 'q' key press
            finish_session = True  # Set the session finish flag
        time.sleep(0.1)  # Short pause to reduce CPU load

# Function to analyze emotion based on audio features
@log_execution_time
def analyze_emotion(audio_data, sr):
    # Convert audio data to float32 and normalize
    audio_data = audio_data.astype(np.float32) / np.max(np.abs(audio_data))
    
    # Extracting features
    energy = np.mean(librosa.feature.rms(y=audio_data))
    pitch, _ = librosa.piptrack(y=audio_data, sr=sr)
    pitch_mean = np.mean(pitch[pitch > 0])

    # Basic emotion classification (for demonstration purposes)
    if energy > 0.1 and pitch_mean > 150:
        emotion = "excited"
    elif energy < 0.05:
        emotion = "calm"
    elif pitch_mean < 120:
        emotion = "scared"
    else:
        emotion = "neutral"

    return emotion

# Speech recognition function with emotion analysis
def recognize_speech():
    global recording, finish_session
    patient_query = ""
    r = sr.Recognizer()
    print("Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.")

    while not finish_session:
        if keyboard.is_pressed('s') and not recording:  # Start recording on 's' key press
            recording = True
            print("Recording. Speak...")
            threading.Thread(target=stop_recording, daemon=True).start()  # Thread to stop recording

        if recording:  # Recording is active
            with sr.Microphone() as source:
                r.adjust_for_ambient_noise(source)
                full_text = []

                while recording and not finish_session:
                    try:
                        audio = r.listen(source, timeout=None)
                        text = r.recognize_google(audio, language="ru-RU")
                        print(f"You said: {text}")
                        full_text.append(text)
                        
                        # Convert to audio array for emotion analysis
                        audio_data = np.frombuffer(audio.get_raw_data(), np.int16)
                        emotion = analyze_emotion(audio_data, source.SAMPLE_RATE)
                        print(f"Emotion: {emotion}")

                    except sr.UnknownValueError:
                        print("Could not understand the audio. Please speak clearly.")
                    except sr.RequestError as e:
                        print(f"Service error; {e}")
                        break

                print("Recording stopped.")
                recording = False
                patient_query = ' '.join(full_text)
                return patient_query, emotion  # Return recognized text with emotion
    
    return patient_query, "No emotion detected"  # Default return in case of session end

# Function to generate an audio response
@log_execution_time
def text_to_speech(text):
    tts = gTTS(text=text, lang='ru')  # Changed 'ru' to 'en'
    tts.save("response.mp3")
    audio = AudioSegment.from_mp3("response.mp3")
    play(audio)


# Function to find the most recent completed conversation (previous_talk)
@log_execution_time
def find_previous_talk(patient_id, cursor):
    
    cursor.execute("""
        SELECT TOP 1 Record FROM Talks
        WHERE ID_Patient = ? 
        ORDER BY Date_Time DESC
    """, (patient_id,))
    previous_talk = cursor.fetchone()

    return previous_talk[0] if previous_talk else ""

# Function to update the session record with patient query
def update_session_record_query(patient_query, session_record):
    session_record += f"Patient said: {patient_query}. "    
    return session_record

# Function to update the session record with program response
def update_session_record_response(program_response, session_record):
    session_record += f"Psychologist responded: {program_response}. "
    return session_record

# Function to find the most similar and most dissimilar conversations 2.2015  seconds
@log_execution_time
def find_similar_talks(llm, model, patient_id, query, cursor):        
     
    # Fetch all embeddings and summaries for the given patient from the database
    cursor.execute("SELECT Embedding, Summary FROM Talks WHERE ID_Patient = ?", (patient_id,))
    past_talks = cursor.fetchall()
    
    # Generate the embedding for the current query
    query_s = generate_summary(llm, query)
    query_embedding = model.encode(query_s, convert_to_tensor=True)
    
    # Variables to store the most similar and most dissimilar talks
    most_similar_talk = None
    most_dissimilar_talk = None
    
    highest_similarity = float('-inf')  # Initialize with a very low value
    lowest_similarity = float('inf')    # Initialize with a very high value

    # Loop through each saved embedding from the database
    for talk in past_talks:
        talk_embedding_str = talk[0]  # Extract the embedding as a string
        talk_summary = talk[1]        # Extract the corresponding summary
        
        # Convert the embedding from a string back to a tensor
        talk_embedding = torch.tensor(list(map(float, talk_embedding_str.split(','))))
        
        # Compute cosine similarity between the current query and the saved embeddings
        similarity = util.pytorch_cos_sim(query_embedding, talk_embedding).item()
        
        # Check if this is the most similar talk so far
        if similarity > highest_similarity:
            highest_similarity = similarity
            most_similar_talk = talk_summary
        
        # Check if this is the most dissimilar talk so far
        if similarity < lowest_similarity:
            lowest_similarity = similarity
            most_dissimilar_talk = talk_summary
    
    # Return the summaries of the most similar and most dissimilar talks
    return most_similar_talk, most_dissimilar_talk

# Function to retrieve patient information from the database
@log_execution_time
def get_patient_info(patient_id, cursor):
       
    cursor.execute("""
        SELECT ID_Patient, Name, Date_of_birth, Sex, Additional_datas, Condition 
        FROM Patients 
        WHERE ID_Patient = ?
    """, (patient_id,))
    patient_info = cursor.fetchone()
    
    if patient_info:
        return {
            "ID_Patient": patient_info[0],
            "Name": patient_info[1],
            "Date_of_birth": patient_info[2],
            "Sex": patient_info[3],
            "Additional_datas": patient_info[4],
            "Condition": patient_info[5]
        }
    else:
        return None

# Function to generate a response using LLM and langchain-groq. For OpenIA model made fine-tuning as Psychologist
@log_execution_time 
def generate_response_llm(llm, session_record, previous_talk, similar_talk, dissimilar_talk, patient_info, emotion):
    system_message = SystemMessage(content=f"""        
        Speak russian.
        
        Patient information:
            Name: {patient_info['Name']},
            Date of birth: {patient_info['Date_of_birth']},
            Sex: {patient_info['Sex']},
            Additional data: {patient_info['Additional_datas']},
            Condition: {patient_info['Condition']}.

        Previous conversation with the Patient: {previous_talk or 'No previous conversation'}. 
        Summary of the most similar conversation with the Patient: {similar_talk or 'No similar conversation'}.
        Summary of the most dissimilar conversation with the Patient: {dissimilar_talk or 'No dissimilar conversation'}. 
        Pay attention to the emotional analysis of speech: {emotion or 'No emotion detected'}      
    """)        
    human_message = HumanMessage(content=f"""
        Here is the current conversation record with the Patient: {session_record}.
    """)
    
    try:
        response = llm.invoke([system_message, human_message])
    except Exception as e:
        print(f"Error calling LLM: {e}")      
    
    return response.content

# Function to generate a summary at the end of the conversation
@log_execution_time
def generate_summary(llm, session_record):
    system_message = SystemMessage(content="You are a qualified psychologist. Create a brief summary of your conversation whith Patient.")
    human_message = HumanMessage(content=f"Conversation: {session_record}.")
    
    response = llm([system_message, human_message])
    return response.content

# Function extracts facts about the patient from his conversation and updates the collected data about him
@log_execution_time
def update_patient_info(llm, session_record, patient_info): 
    system_message = SystemMessage(content=f"""        
        Review the current conversation transcript and update the 'Additional_datas' field with new facts about the patient, if any: {patient_info['Additional_datas']}.        
    """) 
    human_message = HumanMessage(content=f"Here is the current conversation record with the Patient: {session_record}")
    
    # Call the model to update additional_data
    updated_additional_datas = llm([system_message, human_message])
    
    # Update additional_datas field using model response
    patient_info['Additional_datas'] = updated_additional_datas.content.strip()
    
    return patient_info


# Function to save the conversation data, including the embedding, updates Additional_datas into the database
@log_execution_time
def save_talk(model, patient_id, text, summary, sentiment, patient_info, cursor):
    embedding = model.encode(summary, convert_to_tensor=True)
    embedding_str = ','.join(map(str, embedding.tolist()))
    
    cursor.execute("""
        INSERT INTO Talks (ID_Patient, Date_Time, Record, Summary, Sentiment, Embedding)
        VALUES (?, GETDATE(), ?, ?, ?, ?)
    """, (patient_id, text, summary, sentiment, embedding_str))

    Additional_datas = patient_info['Additional_datas']  

    cursor.execute("""
        UPDATE Patients
        SET Additional_datas = ?
        WHERE ID_Patient = ?
    """, (Additional_datas, patient_id)) 
    

# Function to register a new patient
def register_patient(cursor):
    print("Patient not found. Please register.")
    name = input("Enter name: ")
    date_of_birth = input("Enter date of birth (YYYY-MM-DD): ")
    sex = input("Enter sex: ")
    additional_data = input("Enter additional information if your want: ")    
 
    # Insert new patient data into the Patients table and get the new ID
    cursor.execute("""
        INSERT INTO Patients (Name, Date_of_birth, Sex, Additional_datas) 
        OUTPUT INSERTED.ID_Patient  -- Adjust this if your ID field has a different name
        VALUES (?, ?, ?, ?)
    """, (name, date_of_birth, sex, additional_data))

    # Fetch the ID of the new patient record
    patient_id = cursor.fetchone()[0]  # Retrieves the first column of the first row
   
    print(f"Patient registered with ID: {patient_id}")
    return patient_id

# Connecting to SQL Server database
class DatabaseConnection:
    def __enter__(self):        
        self.conn = pyodbc.connect(
            'DRIVER={SQL Server};'
            'SERVER=DESKTOP-SPMU70G\\SQLEXPRESS;'
            'DATABASE=Psychologist;'
            'UID=DESKTOP-SPMU70G\\domashniy;'
            'Trusted_Connection=yes;'
        )
        self.cursor = self.conn.cursor()
        return self.conn, self.cursor

    def __exit__(self, exc_type, exc_value, traceback):
        self.conn.commit()
        self.conn.close()

def main(patient_id):
    global recording, finish_session, llm, model
    recording = False
    finish_session = False
    llm, model = import_llm_models()

    session_record = ""
    response_text = ""

    # Using DatabaseConnection as a context manager
    with DatabaseConnection() as (conn, cursor):
        previous_talk = find_previous_talk(patient_id, cursor)

        # Start a thread to monitor session termination
        threading.Thread(target=finish_session_function, daemon=True).start()

        patient_info = get_patient_info(patient_id, cursor)
        if not patient_info:
            patient_id = register_patient(cursor)
            patient_info = get_patient_info(patient_id, cursor)

        print(f"Starting session for: {patient_info['Name']}")

        Start_Talk = True
        similar_talk = dissimilar_talk = ""

        while True:
            patient_query, emotion = recognize_speech()
            if finish_session:
                print("Session ended.")
                summary = generate_summary(llm, session_record)
                print(f"Conversation summary: {summary}")
                update_patient_info(llm, session_record, patient_info)
                save_talk(model, patient_id, session_record, summary, "Neutral", patient_info, cursor)
                break

            session_record = update_session_record_query(patient_query, session_record)

            if Start_Talk:
                similar_talk, dissimilar_talk = find_similar_talks(
                    llm, model, patient_id, session_record, cursor
                )
                Start_Talk = False

            response_text = generate_response_llm(
                llm, session_record, previous_talk, similar_talk, dissimilar_talk, patient_info, emotion
            )
            print(f"Program response: {response_text}")

            session_record = update_session_record_response(response_text, session_record)

            text_to_speech(response_text)

# Run the main function
if __name__ == "__main__":
    patient_id = input("Enter patient ID: ")
    main(patient_id)
   


Execution time for 'find_previous_talk': 0.0021  seconds.
Execution time for 'get_patient_info': 0.0010  seconds.
Starting session for: Ilya      
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: жить хорошо
Execution time for 'analyze_emotion': 0.0120  seconds.
Emotion: calm
Recording stopped.
Execution time for 'generate_summary': 1.9009  seconds.
Execution time for 'find_similar_talks': 1.9430  seconds.
Execution time for 'generate_response_llm': 0.7305  seconds.
Program response: Что, по вашему мнению, делает жизнь хорошей?
Execution time for 'text_to_speech': 5.3543  seconds.
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Session ended.
Execution time for 'generate_summary': 1.2484  seconds.
Conversation summary: Summary: The patient expressed a positive sentiment about life, stating "жить хорошо" (life is good). The psychologist prompted further reflection by asking what specifical