In [None]:
!pip install openai langchain langchain-community langchain-core chromadb pandas tiktoken sentence-transformers gradio datasets




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI

In [None]:
# Set OpenAI API Key
os.environ["OPENAI_API_KEY"] = "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"


In [None]:
# === Load Datasets === #
# 1. Kaggle Dataset
df_kaggle = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/datasetkaggle.csv")

# 2. Eraly-ml Dataset
df_eraly = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/mhkk.csv")

df_eraly = df_eraly.drop(["Context", "Response"], axis=1)

# 3. Heliosbrahma Dataset (.parquet)
df_helios = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/mental_health_chatbot_dataset (2).csv")

# === Normalize into (Context, Response) === #
docs = []

In [None]:
# Kaggle
for idx, row in df_kaggle.iterrows():
    content = f"Context: {row['Context']}\nResponse: {row['Response']}\nLLM: {row['LLM']}"
    docs.append(Document(page_content=content, metadata={"source": "kaggle", "row": idx}))

# Eraly-ml (English and Kazakh)
for idx, row in df_eraly.iterrows():
    if pd.notna(row.get("Context_kk")) and pd.notna(row.get("Response_kk")):
        content_kk = f"Context: {row['Context_kk']}\nResponse: {row['Response_kk']}"
        docs.append(Document(page_content=content_kk, metadata={"source": "eraly_kk", "row": idx}))

In [None]:
# Updated Heliosbrahma (CSV format with 'human' and 'assistant' columns)
for idx, row in df_helios.iterrows():
    if pd.notna(row.get("human")) and pd.notna(row.get("assistant")):
        content = f"Context: {row['human'].strip()}\nResponse: {row['assistant'].strip()}"
        docs.append(Document(page_content=content, metadata={"source": "helios", "row": idx}))


In [None]:
# === Chunking === #
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
text_chunks = splitter.split_documents(docs)

# === Embeddings and Vector DB === #
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
DB_CHROMA_PATH = "chroma_db_all"

vectordb = Chroma.from_documents(text_chunks, embedding_model, persist_directory=DB_CHROMA_PATH)
vectordb.persist()


  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
  vectordb.persist()


In [None]:
# === LLM Setup === #
def load_llm():
    return ChatOpenAI(model="gpt-4-turbo", temperature=0.9)

CUSTOM_PROMPT = """
You are a professional psychologist with deep empathy, emotional intelligence, and cultural sensitivity.

Instructions:
- Read the patient's question and background context carefully.
- Identify the language of the question and respond in the same language.
- Do not provide generic advice — tailor your response to the situation using the given context.
- If you do not understand what language patient's using, say "I don't know this language"

Language Rules:
- Respond in the same language as the question.
- You know only 3 languages: English, Russian, Kazakh
- Never switch languages on your own.
Context: {context}
Question: {question}

Answer:
"""

prompt = PromptTemplate(template=CUSTOM_PROMPT, input_variables=["context", "question"])
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm(),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

  return ChatOpenAI(model="gpt-4-turbo", temperature=0.9)


In [None]:
# === Interaction (Colab Test Only) === #
LOG_PATH = "/content/drive/MyDrive/Colab Notebooks/chatbota.csv"

def log_interaction(question: str, answer: str):
    new_entry = pd.DataFrame([{
        "Question": question.strip(),
        "LLM_answer": answer.strip()
    }])
    if not os.path.exists(LOG_PATH):
        new_entry.to_csv(LOG_PATH, index=False)
    else:
        new_entry.to_csv(LOG_PATH, mode="a", header=False, index=False)

user_query = input("ваш вопрос: ")
response = qa_chain.invoke({"query": user_query})
print("RESULT:\n", response["result"])
log_interaction(user_query, response["result"])

ваш вопрос: у меня тревожность
RESULT:
 Я понимаю вас, ваше состояние может быть действительно тяжелым. Постоянное чувство тревоги может существенно влиять на вашу повседневную жизнь. Важно обратиться за профессиональной помощью, чтобы разобраться в корнях вашей тревожности и научиться управлять ей. Психотерапевт может предложить вам конкретные методы и стратегии, которые помогут снизить уровень тревоги, такие как когнитивно-поведенческая терапия. Также может быть полезно установить регулярный режим дня, включая время для отдыха и релаксации. Не забывайте о важности физической активности, которая способствует улучшению настроения и снижению тревожности.


In [None]:
!pip install gradio --upgrade


Collecting gradio
  Downloading gradio-5.32.1-py3-none-any.whl.metadata (16 kB)
Collecting gradio-client==1.10.2 (from gradio)
  Downloading gradio_client-1.10.2-py3-none-any.whl.metadata (7.1 kB)
Downloading gradio-5.32.1-py3-none-any.whl (54.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gradio_client-1.10.2-py3-none-any.whl (323 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.3/323.3 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gradio-client, gradio
  Attempting uninstall: gradio-client
    Found existing installation: gradio_client 1.10.1
    Uninstalling gradio_client-1.10.1:
      Successfully uninstalled gradio_client-1.10.1
  Attempting uninstall: gradio
    Found existing installation: gradio 5.31.0
    Uninstalling gradio-5.31.0:
      Successfully uninstalled gradio-5.31.0
Successfully installed gradio-5.32.1 gradio-clie

In [None]:
import gradio as gr
def chat(message, history):
    response = qa_chain.invoke({"query": message})
    return response["result"]


# === STEP 7: Launch Gradio ===
iface = gr.ChatInterface(
    fn=chat,
    #inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
    #outputs="text",
    title="Psychological Support Chatbot Using GPT-4",
    description="Ask in Kazakh, Russian, or English. The GPT-4 will respond empathetically."
)

iface.launch()

  self.chatbot = Chatbot(


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ddbc74eebcd0a840f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


