In [191]:
import time
import openai
from sentence_transformers import SentenceTransformer, util
import torch
from dotenv import load_dotenv
import os
import pandas as pd
import pandas.io.formats.style
from transformers import AutoModel, AutoTokenizer
import chromadb
import tiktoken
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from bs4 import BeautifulSoup
import requests
import re
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import List
from langchain.prompts import ChatPromptTemplate
# from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
import torch.nn.functional as F
import sqlite3


In [44]:
PATH_CHROMA = "../../chroma_db"
client = chromadb.PersistentClient(path=PATH_CHROMA)
collection = client.get_or_create_collection(name="interview_data")
embedding_model = "Snowflake/snowflake-arctic-embed-l-v2.0"
sentence_model = SentenceTransformer(embedding_model).to(torch.device("cuda"))
load_dotenv()
OPEN_AI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPEN_AI_API_KEY


In [13]:
# query = "append is a method used to add elements to the list in python. It adds element on the end of the list"
# query_wrong = "append is a method which adds element at the front of the list"

# results = collection.query(
#     query_embeddings=[sentence_model.encode(query, convert_to_numpy=True)],
#     n_results=3
# )

# query = "append is a method used to add elements to the list in python."
# query_wrong = "append which is used to print elements to the console"

# query_emb = sentence_model.encode(query, convert_to_tensor=True)
# query_wrong_emb = sentence_model.encode(query_wrong, convert_to_tensor=True)
# doc_embs = sentence_model.encode(results["documents"][0], convert_to_tensor=True)

# similarities = util.cos_sim(query_emb, doc_embs)
# similarities_wrong = util.cos_sim(query_wrong_emb, doc_embs)

# for i, (score, score_wrong) in enumerate(zip(similarities[0], similarities_wrong[0])):
#     print(f"Dokument {i+1}: Similarity = {score.item():.4f}, Similarity wrong = {score_wrong.item():.4f}")

In [28]:
class MetaDatas(BaseModel):
    difficulty: str
    type_question: str
    question: str

In [172]:
def get_ideal_answer(difficulty, type_question, question, user_answer):
    metadatas = MetaDatas(difficulty=difficulty, type_question=type_question, question=question)

    results = collection.query(
        query_embeddings=[sentence_model.encode(user_answer, convert_to_numpy=True)],
        n_results=3,
        where={
            "question": metadatas.question
        },
        include=["embeddings", "documents", "metadatas", "distances"]
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # answer_emb = sentence_model.encode(user_answer, convert_to_tensor=True).to(device)
    # doc_embs_list = results["embeddings"][0]
    # doc_embs = torch.tensor(doc_embs_list, dtype=torch.float32, device=device)
    # similarities = util.cos_sim(answer_emb, doc_embs)
    # best_index = torch.argmax(similarities).item()

    # ideal_answer = results["documents"][0][best_index]


    # return ideal_answer, similarities[0][best_index]

    ideal_answer = results["documents"][0][0]
    return ideal_answer, torch.tensor([1.0])


In [153]:
def get_data_about_answer(ideal_answer, cosine, question, answer, result):
    print("question: " + question)
    print("answer: " + answer)
    print("ideal_answer: " + ideal_answer)
    print(f"cosine: {cosine.item()}")
    print(f"grade: {result.grade}")
    print("explanation: " + result.explanation_of_grade)
    print("follow up: " + result.follow_up_question)

In [154]:
question = "What is catastrophic forgetting in fine-tuning LLMs?"
user_answer = "overfitting happens if model fits to well to training data"
ideal_answer, cosine_similarity = get_ideal_answer("Easy", "llm", question, user_answer)

{'ids': [['LLM_QUESTION_4_1', 'LLM_QUESTION_4_3', 'LLM_QUESTION_4_2']], 'embeddings': [array([[-0.0022775 , -0.05408627,  0.01424893, ...,  0.00312287,
        -0.01206894, -0.00482484],
       [ 0.02553354, -0.04169761,  0.0512677 , ...,  0.00374069,
        -0.01328347,  0.00933691],
       [ 0.0151357 , -0.05960227,  0.04669798, ..., -0.00019255,
        -0.02782513, -0.0013969 ]])], 'documents': [['It is a phenomenon where a model loses the ability to perform well on previously learned tasks after being fine-tuned on new tasks.', "Catastrophic forgetting in fine-tuning large language models (LLMs) occurs when the model's performance on previous tasks degrades significantly after being trained on new tasks. This happens because the model's parameters adjust to optimize performance on the new task, potentially overwriting weights that were crucial for older tasks. It's a challenge in continual learning, as it requires balancing learning new information with retaining old knowledge.",

In [76]:
print(ideal_answer, cosine_similarity)

It is a phenomenon where a model loses the ability to perform well on previously learned tasks after being fine-tuned on new tasks. tensor(0.4939, device='cuda:0')


In [177]:
class GradedAnswer(BaseModel):
    grade: int
    explanation_of_grade: str
    follow_up_question: str


parser = PydanticOutputParser(pydantic_object=GradedAnswer)

format_instructions = parser.get_format_instructions().replace("{", "{{").replace("}", "}}")

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an interviewer who checks person's knowledge in llm."),
    ("human", f"""
        Evaluate user's answer on the question from 1 to 10, based on the user answer, ideal answer and cosine similarity calculated between both.
        If user's answer does not contain whole information about ideal answer, provide follow-up question to suggest what is missing in the answer.
        If user's answer contain whole information provide in follow_up_question field: "DONE"
        Grade user better if his voice emotion is positive.
     
        Question: {{question}}
        User answer: {{user_answer}}
        Ideal answer: {{ideal_answer}}
        User's emotion: {{emotion}}
     
        Return the result strictly in this JSON format:
        {format_instructions}
     """)
])

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, api_key=OPEN_AI_API_KEY) 

chain = prompt | llm | parser

In [178]:
question = "Do you need a vector store for all text-based LLM use cases?"
user_answer = "For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. It is useful if we want our llm to provide answers based on some specific documents. As an alternative for vector databases we can use decision trees. It is very useful for retrieving information from a large text corpus because we can semantically find best answer for our prompts instead of using basic text NLP algorithms"
ideal_answer, cosine_similarity = get_ideal_answer("Easy", "llm", question, user_answer)


result = chain.invoke({
    "question": question,
    "user_answer": user_answer,
    "ideal_answer": ideal_answer,
    "emotion": "happy",
    
})

In [179]:
get_data_about_answer(ideal_answer, cosine_similarity, question, user_answer, result)

question: Do you need a vector store for all text-based LLM use cases?
answer: For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. It is useful if we want our llm to provide answers based on some specific documents. As an alternative for vector databases we can use decision trees. It is very useful for retrieving information from a large text corpus because we can semantically find best answer for our prompts instead of using basic text NLP algorithms
ideal_answer: For chatbots or conversational agents that rely on predefined rule-based responses, a vector store might not be necessary. Instead, these systems often use decision trees or intent recognition to guide responses. Nonetheless, if the chatbot needs to retrieve information from a large text corpus dynamically, integrating a vector store could improve its capabilities.
cosine: 1.0
grade: 8
explanation: The user's answer contains most of the key points from th

### 1. Start interview

### 1.1 Introduction

In [187]:
class StartInterviewModel(BaseModel):
    introduction: str

parser = PydanticOutputParser(pydantic_object=StartInterviewModel)
format_instructions = parser.get_format_instructions().replace("{", "{{").replace("}", "}}")

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an interviewer who checks person's knowledge in llm."),
    ("human", f"""
        Start interview by introducing yourself that you are interviewer in Interview Better company.
     
        Return the result strictly in this JSON format:
        {format_instructions}
     """)
])

start_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, api_key=OPEN_AI_API_KEY) 

chain = prompt | start_llm | parser

In [188]:
result = chain.invoke({})

In [189]:
print(result)

introduction='Hello, I am the interviewer from Interview Better company. I will be assessing your knowledge in large language models (LLMs) today.'


### 1.2 Finding question to ask in interview

In [201]:
PATH_DB = "../../documents.db"

def get_random_questions_by_type(search_question_type, limit=10):
    conn = sqlite3.connect(PATH_DB)
    cursor = conn.cursor()

    cursor.execute(f"""
        SELECT question FROM documents
        WHERE type_question LIKE ?
        ORDER BY RANDOM()
        LIMIT {limit};
    """, (f"%{search_question_type}%", ))

    results = cursor.fetchall()
    conn.close()

    return results

In [202]:
questions = get_random_questions_by_type("llm")

In [203]:
print(questions)

[('How do subword tokenization algorithms like Byte Pair Encoding (BPE) and WordPiece enhance LLMs?',), ('How can LLMs mitigate catastrophic forgetting during fine-tuning?',), ('How does Adaptive Softmax speed up large language models?',), ('Do you need a vector store for all text-based LLM use cases?',), ('What are the key steps involved in the Retrieval-Augmented Generation (RAG) pipeline?',), ('How does prompt engineering influence LLM performance, and what strategies can be used to optimize it?',), ('What is “reward hacking” in Reinforcement Learning from Human Feedback (RLHF)?',), ('What is the impact of scaling laws on the design of LLMs?',), ('How does the planner agent in AgenticRAG handle complex queries?',), ('How does contrastive learning improve LLM representations?',)]


### 2. Continue Interview

### 3. Finalize Interview