# Data Generation

In [None]:
%%capture
!pip install --upgrade llama-index
!pip install llama-index-llms-ollama
!pip install llama-index-finetuning
!pip install llama-index-llms-groq
!pip install llama-index-llms-gemini
!pip install unsloth vllm
!pip install triton==3.1.0
!pip install -U pynvml
!pip install transformers accelerate torch huggingface_hub 
!pip install -U bitsandbytes


In [None]:
%%capture
!git clone https://github.com/AkeshSamuditha/Intellihack_SurgicalMasks_03.git

# Generate Synthetic Data

In [None]:
# !git -C Intellihack_SurgicalMasks_03/ pull
# !python "/kaggle/working/Intellihack_SurgicalMasks_03/Data Generation.py" --folder "/kaggle/input/task03" --api_key "API KEY"

# Embedding Model FT

In [None]:
# !git -C Intellihack_SurgicalMasks_03/ pull
# !python "/kaggle/working/Intellihack_SurgicalMasks_03/embedding_ft.py" 

# Qwen Training 

In [None]:
# !git -C Intellihack_SurgicalMasks_03/ pull
# !python "/kaggle/working/Intellihack_SurgicalMasks_03/custom_train.py"

## Inferencing

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

embedding_model_name = "SurgicalMasks_bge-large-en-v1.5"
qa_model_name = "SurgicalMasks_Qwen2.5-3B-Instruct-4bit"

embedding_model = AutoModel.from_pretrained(embedding_model_name)
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)

qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)

def get_embedding(text):
    inputs = embedding_tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        embedding = embedding_model(**inputs).last_hidden_state.mean(dim=1)
    return embedding

def find_best_context(question_embedding, corpus_embeddings, corpus):
    similarities = cosine_similarity(question_embedding.cpu().numpy(), corpus_embeddings.cpu().numpy())
    best_idx = np.argmax(similarities)
    return corpus[best_idx]

def get_answer(question, context):
    inputs = qa_tokenizer(question, context, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = qa_model(**inputs)
    start_idx = outputs.start_logits.argmax()
    end_idx = outputs.end_logits.argmax()
    answer_tokens = inputs['input_ids'][0][start_idx:end_idx + 1]
    answer = qa_tokenizer.decode(answer_tokens)
    return answer

def inference(question, corpus):
    question_embedding = get_embedding(question)
    corpus_embeddings = torch.stack([get_embedding(doc) for doc in corpus])
    best_context = find_best_context(question_embedding, corpus_embeddings, corpus)
    answer = get_answer(question, best_context)
    return answer

### Example Usage

In [None]:
import json

question = "What is deepseek"
with open('train_dataset.json', 'r') as f:
    corpus_data = json.load(f)

answer = inference(question, corpus_data)
print(answer)
