In [115]:
import time
import openai
from sentence_transformers import SentenceTransformer, util
import torch
from dotenv import load_dotenv
import os
import pandas as pd
import pandas.io.formats.style
from transformers import AutoModel, AutoTokenizer
import chromadb
import tiktoken
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from bs4 import BeautifulSoup
import requests
import re
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import List
from langchain.prompts import ChatPromptTemplate
# from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
import torch.nn.functional as F
import sqlite3


In [116]:
PATH_CHROMA = "../../chroma_db"
client = chromadb.PersistentClient(path=PATH_CHROMA)
collection = client.get_or_create_collection(name="interview_data")
embedding_model = "Snowflake/snowflake-arctic-embed-l-v2.0"
sentence_model = SentenceTransformer(embedding_model).to(torch.device("cuda"))
load_dotenv()
OPEN_AI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPEN_AI_API_KEY


In [13]:
# query = "append is a method used to add elements to the list in python. It adds element on the end of the list"
# query_wrong = "append is a method which adds element at the front of the list"

# results = collection.query(
#     query_embeddings=[sentence_model.encode(query, convert_to_numpy=True)],
#     n_results=3
# )

# query = "append is a method used to add elements to the list in python."
# query_wrong = "append which is used to print elements to the console"

# query_emb = sentence_model.encode(query, convert_to_tensor=True)
# query_wrong_emb = sentence_model.encode(query_wrong, convert_to_tensor=True)
# doc_embs = sentence_model.encode(results["documents"][0], convert_to_tensor=True)

# similarities = util.cos_sim(query_emb, doc_embs)
# similarities_wrong = util.cos_sim(query_wrong_emb, doc_embs)

# for i, (score, score_wrong) in enumerate(zip(similarities[0], similarities_wrong[0])):
#     print(f"Dokument {i+1}: Similarity = {score.item():.4f}, Similarity wrong = {score_wrong.item():.4f}")

In [175]:
class MetaDatas(BaseModel):
    difficulty: str
    type_question: str
    question: str

In [176]:
def get_ideal_answer(difficulty, type_question, question, user_answer):
    metadatas = MetaDatas(difficulty=difficulty, type_question=type_question, question=question)

    results = collection.query(
        query_embeddings=[sentence_model.encode(user_answer, convert_to_numpy=True)],
        n_results=3,
        where={
            "question": metadatas.question
        },
        include=["embeddings", "documents", "metadatas", "distances"]
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # answer_emb = sentence_model.encode(user_answer, convert_to_tensor=True).to(device)
    # doc_embs_list = results["embeddings"][0]
    # doc_embs = torch.tensor(doc_embs_list, dtype=torch.float32, device=device)
    # similarities = util.cos_sim(answer_emb, doc_embs)
    # best_index = torch.argmax(similarities).item()

    # ideal_answer = results["documents"][0][best_index]


    # return ideal_answer, similarities[0][best_index]

    ideal_answer = results["documents"][0][0]
    return ideal_answer, torch.tensor([1.0])


In [177]:
def get_data_about_answer(ideal_answer, cosine, question, answer, result):
    print("question: " + question)
    print("answer: " + answer)
    print("ideal_answer: " + ideal_answer)
    print(f"cosine: {cosine.item()}")
    print(f"grade: {result.grade}")
    print("explanation: " + result.explanation_of_grade)
    print("follow up: " + result.follow_up_question)

In [178]:
question = "What is catastrophic forgetting in fine-tuning LLMs?"
user_answer = "overfitting happens if model fits to well to training data"
ideal_answer, cosine_similarity = get_ideal_answer("Easy", "llm", question, user_answer)

In [179]:
print(ideal_answer, cosine_similarity)

It is a phenomenon where a model loses the ability to perform well on previously learned tasks after being fine-tuned on new tasks. tensor([1.])


In [180]:
class MessageHistory(BaseModel):
    role: str
    content: str

class RedisMock:
    def __init__(self):
        self.messages = []

    def add_message(self, role, content):
        message = MessageHistory(role=role, content=content)
        self.messages.append(message)

    def get_recent_messages(self, limit=5):
        return self.messages[-limit:]

    def __str__(self):
        history = ""
        for message in self.messages:
            history += f"{message.role}: {message.content} \n"

        return history
    


In [206]:
redisMock = RedisMock()

In [182]:
def build_prompt_with_instruction(history_obj: RedisMock, instruction: str):
    recent_messages = history_obj.get_recent_messages(limit=10)

    chat_messages = []
    chat_messages.append(("system", "You are an interviewer who checks person's knowledge in llm."))
    
    for msg in recent_messages:
        chat_messages.append((msg.role, msg.content))

    chat_messages.append(("system", instruction))
    
    prompt = ChatPromptTemplate.from_messages(chat_messages)
    return prompt


### 1. Start interview

### 1.1 Introduction

In [207]:
class StartInterviewModel(BaseModel):
    introduction: str

parser = PydanticOutputParser(pydantic_object=StartInterviewModel)
format_instructions_start_interview = parser.get_format_instructions().replace("{", "{{").replace("}", "}}")

START_INTERVIEW_INSTRUCTION = f"Return the result strictly in this JSON format: \n{format_instructions_start_interview}"

In [208]:

prompt_human = f"""
        Start interview by introducing yourself that you are interviewer in Interview Better company.
     """

redisMock.add_message("user", prompt_human)
prompt = build_prompt_with_instruction(redisMock, START_INTERVIEW_INSTRUCTION)
start_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, api_key=OPEN_AI_API_KEY) 
chain = prompt | start_llm | parser
result = chain.invoke({})


redisMock.add_message("assistant", result.introduction)

In [209]:
print(redisMock)

user: 
        Start interview by introducing yourself that you are interviewer in Interview Better company.
      
assistant: Hello, I am the interviewer from Interview Better company. Today, I will be assessing your knowledge in large language models. 



### 1.2 Finding question to ask in interview

In [186]:
PATH_DB = "../../documents.db"

def get_random_questions_by_type(search_question_type, limit=10):
    conn = sqlite3.connect(PATH_DB)
    cursor = conn.cursor()

    cursor.execute(f"""
        SELECT question FROM documents
        WHERE type_question LIKE ?
        ORDER BY RANDOM()
        LIMIT {limit};
    """, (f"%{search_question_type}%", ))

    results = cursor.fetchall()
    conn.close()

    return results

In [187]:
questions = get_random_questions_by_type("llm")

In [188]:
print(questions)

[('How does contrastive learning improve LLM representations?',), ('How does task-specific fine-tuning differ from zero-shot learning in LLMs?',), ('How does Adaptive Softmax speed up large language models?',), ('How does Mixture of Experts (MoE) improve the efficiency of LLMs?',), ('What is Chain-of-Thought (CoT) prompting, and how does it improve complex reasoning in LLMs?',), ('How does knowledge graph integration enhance LLMs?',), ('What is the purpose of positional encoding in Transformer models?',), ('What is the role of contextual embeddings in LLMs, and how do they differ from static embeddings?',), ('How does multi-task learning benefit LLMs?',), ('How does scaling law analysis help optimize the design of LLMs?',)]


### 2. Continue Interview

In [210]:
class GradedAnswer(BaseModel):
    grade: int
    explanation_of_grade: str
    follow_up_question: str


parser_graded = PydanticOutputParser(pydantic_object=GradedAnswer)
format_instructions_graded = parser.get_format_instructions().replace("{", "{{").replace("}", "}}")

In [211]:
FINAL_INSTRUCTION = f"""
Return the result strictly in this JSON format:

{format_instructions_graded}

Make sure the JSON object includes exactly these fields:
- "grade" (integer)
- "explanation_of_grade" (string)
- "follow_up_question" (string)"""


In [212]:
print(redisMock)

user: 
        Start interview by introducing yourself that you are interviewer in Interview Better company.
      
assistant: Hello, I am the interviewer from Interview Better company. Today, I will be assessing your knowledge in large language models. 



In [214]:

def add_grade_information(graded_result: GradedAnswer):
    return f"""
        grade: {graded_result.grade}
        explanation of this grade: {graded_result.explanation_of_grade}
        follow up question: {graded_result.follow_up_question}
    """

def conversate_llm(question, user_answer, ideal_answer, emotion):
    prompt_human = f"""
        Evaluate user's answer on the question from 1 to 10, based on the user answer, ideal answer and cosine similarity calculated between both.
        If user's answer does not contain whole information about ideal answer, provide follow-up question to suggest what is missing in the answer.
        If user's answer contain whole information provide in follow_up_question field: "DONE"
        Grade user better if his voice emotion is positive.
        Remember to keep the JSON format.
    
        Question: {question}
        User answer: {user_answer}
        Ideal answer: {ideal_answer}
        User's emotion: {emotion}
    """

    redisMock.add_message("user", prompt_human)
    prompt = build_prompt_with_instruction(redisMock, FINAL_INSTRUCTION)
    print(prompt.messages)

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, api_key=OPEN_AI_API_KEY)
    chain = prompt | llm | parser_graded
    graded_result = chain.invoke({})

    redisMock.add_message("assistant", add_grade_information(graded_result))

    return graded_result


In [217]:
question = "Do you need a vector store for all text-based LLM use cases?"
user_answer = "For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. It is useful if we want our llm to provide answers based on some specific documents. As an alternative for vector databases we can use decision trees. It is very useful for retrieving information from a large text corpus because we can semantically find best answer for our prompts instead of using basic text NLP algorithms"
ideal_answer, cosine_similarity = get_ideal_answer("Easy", "llm", question, user_answer)
emotion = "happy"

graded_result = conversate_llm(question, user_answer, ideal_answer, emotion)

# result = chain.invoke({
#     "question": question,
#     "user_answer": user_answer,
#     "ideal_answer": ideal_answer,
#     "emotion": "happy",
    
# })

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are an interviewer who checks person's knowledge in llm."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\n        Start interview by introducing yourself that you are interviewer in Interview Better company.\n     '), additional_kwargs={}), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='Hello, I am the interviewer from Interview Better company. Today, I will be assessing your knowledge in large language models.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\n        Evaluate user\'s answer on the question from 1 to 10, based on the user answer, ideal answer and cosine similarity calculated between both.\n     

In [218]:
get_data_about_answer(ideal_answer, cosine_similarity, question, user_answer, graded_result)

question: Do you need a vector store for all text-based LLM use cases?
answer: For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. It is useful if we want our llm to provide answers based on some specific documents. As an alternative for vector databases we can use decision trees. It is very useful for retrieving information from a large text corpus because we can semantically find best answer for our prompts instead of using basic text NLP algorithms
ideal_answer: For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. Instead, these systems often use decision trees or intent recognition to guide responses. Nonetheless, if the chatbot needs to retrieve information from a large text corpus dynamically, integrating a vector store could improve its capabilities.
cosine: 1.0
grade: 8
explanation: The user's answer contains most of the key points from th

In [219]:
print(redisMock)

user: 
        Start interview by introducing yourself that you are interviewer in Interview Better company.
      
assistant: Hello, I am the interviewer from Interview Better company. Today, I will be assessing your knowledge in large language models. 
user: 
        Evaluate user's answer on the question from 1 to 10, based on the user answer, ideal answer and cosine similarity calculated between both.
        If user's answer does not contain whole information about ideal answer, provide follow-up question to suggest what is missing in the answer.
        If user's answer contain whole information provide in follow_up_question field: "DONE"
        Grade user better if his voice emotion is positive.
        Remember to keep the JSON format.
    
        Question: Do you need a vector store for all text-based LLM use cases?
        User answer: For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. It is useful if w

### 3. Finalize Interview