In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers.utils import is_flash_attn_2_available
from peft import LoraConfig, get_peft_config, PeftModel
from sklearn.model_selection import train_test_split 
from peft import LoraConfig, get_peft_model

In [2]:
MODEL_ID = "./Fine-tunning/nairsV1"
ATTN_IMPLEMENTATION = "flash_attention_2" if (is_flash_attn_2_available() and (torch.cuda.get_device_capability(0)[0] >= 8)) else "sdpa"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_ID = "meta-llama/Llama-2-7b-chat-hf"

In [5]:
use_quantization_config = False
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
print(f"[INFO] Using attention implementation: {ATTN_IMPLEMENTATION}")
print(f"[INFO] Using model_id: {MODEL_ID}")
Model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    quantization_config=quantization_config if use_quantization_config else None,                                             
    attn_implementation=ATTN_IMPLEMENTATION,
    device_map = "auto"
)



[INFO] Using attention implementation: flash_attention_2
[INFO] Using model_id: meta-llama/Llama-2-7b-chat-hf


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


In [7]:
Model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaFlashAttention2(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head

In [None]:
# USER = "Tell me the principle of relativity"
# Prompt = f"""[SYSTEM] You are an AI Tutor. Identify the student's misconception and provide a helpful hint.
# Q: {USER}"""

In [23]:
from Retrieval_system import Retrieval
from langchain_ollama.llms import OllamaLLM
from langchain_pinecone import PineconeVectorStore
from dotenv import load_dotenv
import torch
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from pincone import Pincone_vectorStore
import warnings
from operator import itemgetter


warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)


Device = "cuda" if torch.cuda.is_available else "cpu"
index= Pincone_vectorStore()
Embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2", model_kwargs={"device": Device})
vector_store = PineconeVectorStore(embedding=Embeddings,index=index)
retrieval_obj = Retrieval(device=Device, index=index,Embeddings=Embeddings,vector_store=vector_store)


Successfully connect to <pinecone.db_data.index.Index object at 0x7f3e540dcd90>


In [24]:
def parse_mcq(doc_text):
    lines = [line.strip() for line in doc_text.splitlines() if line.strip()]

    question = None
    options = {}

    for line in lines:
        if line.startswith("Question:"):
            question = line.replace("Question:", "").strip()

        elif len(line) > 2 and line[1] == "." and line[0] in ["A", "B", "C", "D"]:
            options[line[0]] = line[2:].strip()

    return question, options


In [52]:
def evaluate_student_answer(query):
    retriever, _ = retrieval_obj.get_retrieval() 
    docs = retriever.get_relevant_documents(query)
    if not docs:
        return "No relevant context found."
    doc = docs[0]
    question, options = parse_mcq(doc.page_content)
    
    print(f"\nQuestion: {question}")
    for k, v in options.items():
        print(f"{k}. {v}")

    user_answer = input("\nEnter your answer (A/B/C/D): ").strip().upper()
    if user_answer not in options:
        return "Invalid selection. Please choose A, B, C, or D."


    formatted_options = "\n".join([f"{k}. {v}" for k, v in options.items()])
    prompt_text = (
        f"<s>[INST] <<SYS>>\n"
        f"You are an AI Intelligent Tutoring System. Analyze the error in <thinking> tags, then provide a hint.\n"
        f"<</SYS>>\n\n"
        f"Q: {question}\n{formatted_options}\n"
        f"My Answer: {user_answer} [/INST]\n"
        f"<thinking>" 
    )
    inputs = tokenizer(prompt_text, return_tensors="pt").to(Model.device)
    with torch.no_grad():
        output_tokens = Model.generate(
            **inputs,
            max_new_tokens=150,    
            temperature=0.1,      
            repetition_penalty=1.2, 
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )
    input_length = inputs.input_ids.shape[1]
    generated_text = tokenizer.decode(output_tokens[0][input_length:], skip_special_tokens=True)
    full_response = "<thinking>" + generated_text
    clean_output = full_response.split("Q:")[0].split("Question:")[0].split("[INST]")[0].strip()
    return clean_output

In [53]:
print(evaluate_student_answer("WHAT IS Thermodynaics"))


Question: Which process violates the Second Law of Thermodynamics?
A. Heat flowing from hot to cold
B. Spontaneous decrease in entropy
C. Energy conservation in a closed system
D. Isothermal expansion of a gas
<thinking>Ah, an interesting question! Let me analyze your answer...</thinking>

Great job recognizing that option B is the correct answer! The Second Law of Thermodynamics states that "in any spontaneous process, the total entropy of a closed system will always increase over time." Option B correctly identifies that a spontaneous decrease in entropy would violate this law.

Here's a hint for future questions: When analyzing thermodynamic processes, it can be helpful to break them down into smaller components and consider their individual properties before making a decision. In this case, the fact that the process involves a spontaneous decrease in entropy was crucial in determining the correct answer. Keep
<thinking>Ah, an interesting question! Let me analyze your answer...</th

In [None]:

USER = "Tell me the principle of relativity"
Prompt = f"""[SYSTEM] You are an AI Tutor. Identify the student's misconception and provide a helpful hint.
Q: {USER}"""