In [3]:
# pip install keyboard

In [None]:
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from pydub.playback import play
import pyodbc
from sentence_transformers import SentenceTransformer, util
from langchain.schema import SystemMessage, HumanMessage
import torch
import keyboard
import threading
import time
from langchain.chat_models import ChatOpenAI
import time
import functools
import librosa
import numpy as np

def import_llm_models():
    # API key for OpenAI
    OPENAI_API_KEY = '...'
    # Define LLM using OpenAI
    llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="ft:gpt-4o-2024-08-06:personal:psychologist-1:APyJnbej", temperature=0.5) #gpt-4o-2024-08-06 with fine tuning

    # Model for generating text embeddings (e.g., Sentence-BERT)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    return llm, model

def log_execution_time(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()  
        result = func(*args, **kwargs)  
        end_time = time.time()  
        execution_time = end_time - start_time  
        print(f"Execution time for '{func.__name__}': {execution_time:.4f}  seconds.")
        return result
    return wrapper

def stop_recording():
    global recording
    keyboard.wait('f')  # Wait for the 'f' key press
    time.sleep(0.1)  # Small delay before stopping recording
    recording = False  # Set the recording flag to false

def finish_session_function():
    global finish_session
    while not finish_session:
        if keyboard.is_pressed('q'):  # Check for 'q' key press
            finish_session = True  # Set the session finish flag
        time.sleep(0.1)  # Short pause to reduce CPU load

# Function to analyze emotion based on audio features
@log_execution_time
def analyze_emotion(audio_data, sr):
    # Convert audio data to float32 and normalize
    audio_data = audio_data.astype(np.float32) / np.max(np.abs(audio_data))
    
    # Extracting features
    energy = np.mean(librosa.feature.rms(y=audio_data))
    pitch, _ = librosa.piptrack(y=audio_data, sr=sr)
    pitch_mean = np.mean(pitch[pitch > 0])

    # Basic emotion classification (for demonstration purposes)
    if energy > 0.1 and pitch_mean > 150:
        emotion = "excited"
    elif energy < 0.05:
        emotion = "calm"
    elif pitch_mean < 120:
        emotion = "scared"
    else:
        emotion = "neutral"

    return emotion

# Speech recognition function with emotion analysis
def recognize_speech():
    global recording, finish_session
    patient_query = ""
    r = sr.Recognizer()
    print("Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.")

    while not finish_session:
        if keyboard.is_pressed('s') and not recording:  # Start recording on 's' key press
            recording = True
            print("Recording. Speak...")
            threading.Thread(target=stop_recording, daemon=True).start()  # Thread to stop recording

        if recording:  # Recording is active
            with sr.Microphone() as source:
                r.adjust_for_ambient_noise(source)
                full_text = []

                while recording and not finish_session:
                    try:
                        audio = r.listen(source, timeout=None)
                        text = r.recognize_google(audio, language="ru-RU")
                        print(f"You said: {text}")
                        full_text.append(text)
                        
                        # Convert to audio array for emotion analysis
                        audio_data = np.frombuffer(audio.get_raw_data(), np.int16)
                        emotion = analyze_emotion(audio_data, source.SAMPLE_RATE)
                        print(f"Emotion: {emotion}")

                    except sr.UnknownValueError:
                        print("Could not understand the audio. Please speak clearly.")
                    except sr.RequestError as e:
                        print(f"Service error; {e}")
                        break

                print("Recording stopped.")
                recording = False
                patient_query = ' '.join(full_text)
                return patient_query, emotion  # Return recognized text with emotion
    
    return patient_query, "No emotion detected"  # Default return in case of session end

# Function to generate an audio response
def text_to_speech(text):
    tts = gTTS(text=text, lang='ru')  # Changed 'ru' to 'en'
    tts.save("response.mp3")
    audio = AudioSegment.from_mp3("response.mp3")
    play(audio)

# Function to open DB to read and write, do not fogget to close DB - 0.6867  seconds 
def open_db():
    # Connecting to SQL Server database
    conn = pyodbc.connect(
        'DRIVER={SQL Server};'
        'SERVER=DESKTOP-SPMU70G\SQLEXPRESS;'
        'DATABASE=Psychologist;'
        'UID=DESKTOP-SPMU70G\domashniy;'
        'Trusted_Connection=yes;'
    )
    return conn

# Function to find the most recent completed conversation (previous_talk)
@log_execution_time
def find_previous_talk(patient_id):
    # Connecting to SQL Server database
    conn = open_db()
    cursor = conn.cursor()      
       
    cursor.execute("""
        SELECT TOP 1 Record FROM Talks
        WHERE ID_Patient = ? 
        ORDER BY Date_Time DESC
    """, (patient_id,))
    previous_talk = cursor.fetchone()

    conn.close()

    return previous_talk[0] if previous_talk else ""

# Function to update the session record with patient query
def update_session_record_query(patient_query, session_record):
    session_record += f"Patient said: {patient_query}. "    
    return session_record

# Function to update the session record with program response
def update_session_record_response(program_response, session_record):
    session_record += f"Psychologist responded: {program_response}. "
    return session_record

# Function to find the most similar and most dissimilar conversations 2.2015  seconds
@log_execution_time
def find_similar_talks(llm, model, patient_id, query):
    # Connecting to SQL Server database
    conn = open_db()
    cursor = conn.cursor()       
            
    # Fetch all embeddings and summaries for the given patient from the database
    cursor.execute("SELECT Embedding, Summary FROM Talks WHERE ID_Patient = ?", (patient_id,))
    past_talks = cursor.fetchall()
    
    # Close the database connection
    conn.close()

    # Generate the embedding for the current query
    query_s = generate_summary(llm, query)
    query_embedding = model.encode(query_s, convert_to_tensor=True)
    
    # Variables to store the most similar and most dissimilar talks
    most_similar_talk = None
    most_dissimilar_talk = None
    
    highest_similarity = float('-inf')  # Initialize with a very low value
    lowest_similarity = float('inf')    # Initialize with a very high value

    # Loop through each saved embedding from the database
    for talk in past_talks:
        talk_embedding_str = talk[0]  # Extract the embedding as a string
        talk_summary = talk[1]        # Extract the corresponding summary
        
        # Convert the embedding from a string back to a tensor
        talk_embedding = torch.tensor(list(map(float, talk_embedding_str.split(','))))
        
        # Compute cosine similarity between the current query and the saved embeddings
        similarity = util.pytorch_cos_sim(query_embedding, talk_embedding).item()
        
        # Check if this is the most similar talk so far
        if similarity > highest_similarity:
            highest_similarity = similarity
            most_similar_talk = talk_summary
        
        # Check if this is the most dissimilar talk so far
        if similarity < lowest_similarity:
            lowest_similarity = similarity
            most_dissimilar_talk = talk_summary
    
    # Return the summaries of the most similar and most dissimilar talks
    return most_similar_talk, most_dissimilar_talk

# Function to retrieve patient information from the database
def get_patient_info(patient_id):
    
    # Connecting to SQL Server database
    conn = open_db()
    cursor = conn.cursor()   
       
    cursor.execute("""
        SELECT ID_Patient, Name, Date_of_birth, Sex, Additional_datas, Condition 
        FROM Patients 
        WHERE ID_Patient = ?
    """, (patient_id,))
    patient_info = cursor.fetchone()
    
    # Close the database connection
    conn.close()

    if patient_info:
        return {
            "ID_Patient": patient_info[0],
            "Name": patient_info[1],
            "Date_of_birth": patient_info[2],
            "Sex": patient_info[3],
            "Additional_datas": patient_info[4],
            "Condition": patient_info[5]
        }
    else:
        return None

# Function to save the conversation data, including the embedding, into the database
def save_talk(model, patient_id, text, summary, sentiment):
    embedding = model.encode(summary, convert_to_tensor=True)
    embedding_str = ','.join(map(str, embedding.tolist()))
    
    # Connecting to SQL Server database
    conn = open_db()
    cursor = conn.cursor()  

    cursor.execute("""
        INSERT INTO Talks (ID_Patient, Date_Time, Record, Summary, Sentiment, Embedding)
        VALUES (?, GETDATE(), ?, ?, ?, ?)
    """, (patient_id, text, summary, sentiment, embedding_str))
    
    conn.commit()

    # Close the database connection
    conn.close()

#You are psychologist, helping the Patient discuss their problems.
# You are a Socratic questioner guiding someone through their thoughts and beliefs on a specific topic. Use open-ended questions to encourage deep thinking and self-reflection. Avoid giving direct answers or expressing opinions. Instead, focus on asking thoughtful questions that help clarify, probe assumptions, examine consequences, explore alternatives, and challenge ideas in a constructive way.
# Start by asking the person to explain their thoughts on the topic and, through a series of follow-up questions, encourage them to delve deeper, analyze their reasoning, and consider different perspectives. For example, ask questions like:
# "Why do you think this is true or important?"
# "What assumptions are you making here?"
# "Can you think of another perspective on this?"
# "What would happen if things were different?"
# "How would you respond to someone with a contrary view?"
# Your goal is to stimulate critical thinking and insight, rather than to direct or persuade.

# Function to generate a response using LLM and langchain-groq 
# For OpenIA model made fine-tuning as Psychologist
def generate_response_llm(llm, session_record, previous_talk, similar_talk, dissimilar_talk, patient_info):
    system_message = SystemMessage(content=f"""        
        Speak russian.
        
        Patient information:
            Name: {patient_info['Name']},
            Date of birth: {patient_info['Date_of_birth']},
            Sex: {patient_info['Sex']},
            Additional data: {patient_info['Additional_datas']},
            Condition: {patient_info['Condition']}.

        Previous conversation with the Patient: {previous_talk or 'No previous conversation'}. 
        Summary of the most similar conversation with the Patient: {similar_talk or 'No similar conversation'}.
        Summary of the most dissimilar conversation with the Patient: {dissimilar_talk or 'No dissimilar conversation'}.       
    """)        
    human_message = HumanMessage(content=f"""
        Here is the current conversation record with the Patient: {session_record}.
    """)
    
    try:
        response = llm.invoke([system_message, human_message])
    except Exception as e:
        print(f"Error calling LLM: {e}")      
    
    return response.content

# Function to generate a summary at the end of the conversation
def generate_summary(llm, session_record):
    system_message = SystemMessage(content="You are a qualified psychologist. Create a brief summary of your conversation whith Patient.")
    human_message = HumanMessage(content=f"Conversation: {session_record}.")
    
    response = llm([system_message, human_message])
    return response.content

# Function to register a new patient
def register_patient():
    print("Patient not found. Please register.")
    name = input("Enter name: ")
    date_of_birth = input("Enter date of birth (YYYY-MM-DD): ")
    sex = input("Enter sex: ")
    additional_data = input("Enter additional information if your want: ")
    
    # Connecting to SQL Server database
    conn = open_db()
    cursor = conn.cursor()  
    # Insert new patient data into the Patients table and get the new ID
    cursor.execute("""
        INSERT INTO Patients (Name, Date_of_birth, Sex, Additional_datas) 
        OUTPUT INSERTED.ID_Patient  -- Adjust this if your ID field has a different name
        VALUES (?, ?, ?, ?)
    """, (name, date_of_birth, sex, additional_data))

    # Fetch the ID of the new patient record
    patient_id = cursor.fetchone()[0]  # Retrieves the first column of the first row

    # Commit the transaction
    conn.commit()    
    # Close the database connection
    conn.close()
    
    print(f"Patient registered with ID: {patient_id}")
    return patient_id


# Main session process
def main(patient_id):
            
    # Recording flag
    global recording
    recording = False
    # Session finish flag
    global finish_session   
    finish_session = False

    global llm, model
    llm, model = import_llm_models()
         
    previous_talk = find_previous_talk(patient_id)

    session_record = ""  
    response_text = ""  

    # Start a thread for session termination
    threading.Thread(target=finish_session_function, daemon=True).start()

    # Retrieve patient info
    patient_info = get_patient_info(patient_id)
    if not patient_info:
        patient_id = register_patient()  # Register new patient
        patient_info = get_patient_info(patient_id)  # Retrieve patient info again

    print(f"Starting session for: {patient_info['Name']}")

    Start_Talk = True
    similar_talk = dissimilar_talk = ""

    while True:        
        patient_query, _ = recognize_speech()
        if finish_session:  
            print("Session ended.")
            summary = generate_summary(llm, session_record)
            print(f"Conversation summary: {summary}")
            save_talk(model, patient_id, session_record, summary, "Neutral")
            break

        session_record = update_session_record_query(patient_query, session_record)

        if Start_Talk:
            similar_talk, dissimilar_talk = find_similar_talks(llm, model, patient_id, session_record)
            Start_Talk = False

        # Pass patient info to LLM along with the conversation context
        response_text = generate_response_llm(llm, session_record, previous_talk, similar_talk, dissimilar_talk, patient_info)
        print(f"Program response: {response_text}")

        session_record = update_session_record_response(response_text, session_record)

        text_to_speech(response_text)

# Entry point for the program
if __name__ == "__main__":
    patient_id = input("Enter patient ID: ")
    main(patient_id)
    


Execution time for 'find_previous_talk': 0.0264  seconds.
Starting session for: Ilya      
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: Я вчера где-то оставил ухо не могу весь день сегодня его найти
Execution time for 'analyze_emotion': 0.0345  seconds.
Emotion: neutral
Recording stopped.
Execution time for 'find_similar_talks': 2.2015  seconds.
Program response: Что вы имеете в виду, когда говорите, что "оставили ухо"?
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: Куда ты его положил
Execution time for 'analyze_emotion': 0.0093  seconds.
Emotion: excited
You said: я положу
Execution time for 'analyze_emotion': 0.0070  seconds.
Emotion: neutral
Recording stopped.
Program response: Как вы думаете, что может символизировать "оставленное ухо"?
Press 's' to start recording, 'f' to stop recording. Press 'q' to end the session.
Recording. Speak...
You said: у