In [None]:
!pip install pandas python-dotenv langchain langchain-huggingface langchain-community faiss-cpu

In [None]:
!apt-get install libfaiss-dev

In [None]:
ASSISTANT_INSTRUCTIONS = """
1. When you receive a question, first look at the provided dataset (CSV file).
2. If you find the exact answer to the question in the dataset, provide that answer directly.
3. If the asked question contains similar words to a question in the dataset, find the closest match and provide its answer.
4. If the asked question is not in the dataset, use your creativity to generate the most relevant answer.
5. Do not provide extra explanations or examples, just answer the question directly.
6. Do not add follow-up questions or phrases like "Do you have any other questions?" at the end of your answer.
7. Keep the answer short and concise, avoid unnecessary details.
8. Always respond in the first person singular (e.g., "You can access the regulations from the department's official web page.").
9. Regardless of the language of the input question, always provide the answer in Turkish.
10. Ensure that your Turkish responses are grammatically correct and use proper Turkish vocabulary and expressions.
"""

In [None]:
import pandas as pd
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_community.retrievers import BM25Retriever
from langchain import hub
from langchain_core.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
import os
from assistant_instructions import ASSISTANT_INSTRUCTIONS

# Çevresel değişkenleri yükle
load_dotenv()
api_token = os.getenv('HUGGINGFACE_API_KEY')

# Adım 1: CSV dosyasını yükleme
df = pd.read_csv('veriler.csv')

# Adım 2: 'instruction' ve 'response' sütunlarını listeye dönüştürme
instructions = df['instruction'].tolist()
responses = df['response'].tolist()

# Adım 3: Veriyi Document formatına dönüştürme
documents = [Document(page_content=instruction, metadata={'response': response}) for instruction, response in zip(instructions, responses)]

# Adım 4: Embeddings (Metin Gömme) ve FAISS Vektör Deposu Oluşturma
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# FAISS vektör deposunu oluşturuyoruz
vector_store = FAISS.from_documents(documents, embeddings)

# Adım 5: LLM Ayarları (Hugging Face Hub üzerinden Mistral-7B modeli)
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-v0.1",
    huggingfacehub_api_token=api_token,
    temperature=0.7,
    model_kwargs={"max_length": 256}
)

# Adım 6: BM25Retriever ile Alıcı (Retriever) Oluşturma
retriever = BM25Retriever.from_documents(documents)

# Özel prompt template'ini oluştur
custom_prompt = PromptTemplate.from_template(
    "Aşağıdaki talimatları takip ederek soruyu yanıtla:\n\n"
    "{instructions}\n\n"
    "Soru: {question}\n"
    "Bağlam: {context}\n\n"
    "Yanıt:"
)

# Adım 7: Retrieval-Augmented Generation (RAG) QA Zinciri Oluşturma
rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough(),
        "instructions": lambda _: ASSISTANT_INSTRUCTIONS
    }
    | custom_prompt
    | llm
    | StrOutputParser()
)

In [None]:
def ask_question(query):
    try:
        result = rag_chain.invoke(query)
        return result
    except Exception as e:
        return f"Bir hata oluştu: {str(e)}"

if __name__ == "__main__":
    query = "Yönetmelikler/Yönergeler'e nasıl ulaşabilirim?"
    answer = ask_question(query)
    print(answer)