## PDF Y√ºklemesinin Ger√ßekle≈ütirlimesi

In [25]:
import os
from dotenv import load_dotenv

load_dotenv()

# LangSmith tracing'i aktif et
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "gemini-rag-project"

# Ayarlarƒ± doƒürula
print("‚úÖ LangSmith Tracing:", os.getenv("LANGCHAIN_TRACING_V2"))
print("‚úÖ LangSmith Project:", os.getenv("LANGCHAIN_PROJECT"))
print("‚úÖ LangSmith API Key:", "Configured" if os.getenv("LANGCHAIN_API_KEY") else "‚ùå Missing")

‚úÖ LangSmith Tracing: true
‚úÖ LangSmith Project: gemini-rag-project
‚úÖ LangSmith API Key: Configured


## Manuel Zaman √ñl√ß√ºm√º ƒ∞√ßin Decorator

In [27]:
import time
from functools import wraps

def measure_time(operation_name):
    """Decorator to measure execution time"""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            start_time = time.time()
            result = func(*args, **kwargs)
            end_time = time.time()
            duration = end_time - start_time
            print(f"‚è±Ô∏è {operation_name}: {duration:.2f} seconds")
            return result
        return wrapper
    return decorator

In [28]:
from langchain_community.document_loaders import PyPDFLoader
from langsmith import traceable

@traceable(name="load_pdf", run_type="chain")
@measure_time("PDF Loading")
def load_pdf(file_path):
    loader = PyPDFLoader(file_path)
    return loader.load()

file_path = "attentionisallyouneedgemini.pdf"
data = load_pdf(file_path)
print(f"üìÑ Loaded {len(data)} pages")

‚è±Ô∏è PDF Loading: 1.10 seconds
üìÑ Loaded 15 pages


In [2]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "attentionisallyouneedgemini.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
len(data)

15

## Veriyi par√ßalara ayƒ±rma(Chunking i≈ülemi)

In [29]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

In [30]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langsmith import traceable

@traceable(name="chunk_documents", run_type="chain")
@measure_time("Document Chunking")
def chunk_documents(data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(data)

docs = chunk_documents(data)
print(f"üìù Created {len(docs)} chunks")

‚è±Ô∏è Document Chunking: 0.00 seconds
üìù Created 48 chunks


In [5]:
print(f"Number of documents after chunking: {len(docs)}")

Number of documents after chunking: 48


In [6]:
docs[7]

Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attentionisallyouneedgemini.pdf', 'total_pages': 15, 'page': 1, 'page_label': '2'}, page_content='Most competitive neural sequence transduction models have an encoder-decoder structure [5, 2, 35].\nHere, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence\nof continuous representations z = (z1, ..., zn). Given z, the decoder then generates an output\nsequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regressive\n[10], consuming the previously generated symbols as additional input when generating the next.\n2')

## Google Generative AI Embeddings'i Kullanarak Embedding Olu≈üturma ƒ∞≈ülemi

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv



In [8]:
load_dotenv()

True

In [9]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]

[-0.02276923693716526,
 0.010134130716323853,
 0.011886735446751118,
 -0.09669032692909241,
 -0.0027089761570096016]

In [31]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langsmith import traceable

@traceable(name="create_embeddings", run_type="embedding")
@measure_time("Embedding Creation")
def create_embeddings():
    return GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

embeddings = create_embeddings()
vector = embeddings.embed_query("hello, world!")
print(f"üî¢ Embedding dimension: {len(vector)}")

‚è±Ô∏è Embedding Creation: 0.02 seconds
üî¢ Embedding dimension: 3072


## ChromaDB √úzerine Kayƒ±t ƒ∞≈ülemi

In [10]:
from langchain_chroma import Chroma

In [11]:
vector_store = Chroma.from_documents(documents=docs, embedding = embeddings)

In [None]:
from langchain_chroma import Chroma
from langsmith import traceable

@traceable(name="create_vector_store", run_type="chain")
@measure_time("Vector Store Creation")
def create_vector_store(docs, embeddings):
    return Chroma.from_documents(
        documents=docs, 
        embedding=embeddings,
        persist_directory="./chroma_db"
    )

vector_store = create_vector_store(docs, embeddings)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":10})
print("‚úÖ Vector store created and persisted")

In [12]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":10})

In [13]:
retrieved_docs = retriever.invoke("What is encoder?")

In [14]:
len(retrieved_docs)

10

In [15]:
print(retrieved_docs[5].page_content)

Table 1: Maximum path lengths, per-layer complexity and minimum number of sequential operations
for different layer types. n is the sequence length, d is the representation dimension, k is the kernel
size of convolutions and r the size of the neighborhood in restricted self-attention.
Layer Type Complexity per Layer Sequential Maximum Path Length
Operations
Self-Attention O(n2 ¬∑ d) O(1) O(1)
Recurrent O(n ¬∑ d2) O(n) O(n)
Convolutional O(k ¬∑ n ¬∑ d2) O(1) O(logk(n))
Self-Attention (restricted) O(r ¬∑ n ¬∑ d) O(1) O(n/r)
3.5 Positional Encoding
Since our model contains no recurrence and no convolution, in order for the model to make use of the
order of the sequence, we must inject some information about the relative or absolute position of the
tokens in the sequence. To this end, we add "positional encodings" to the input embeddings at the
bottoms of the encoder and decoder stacks. The positional encodings have the same dimension dmodel


## Google Gemini API Yapƒ±sƒ±nƒ± Kullanarak LLM Tetikleme ƒ∞≈ülemleri

- D√º≈ü√ºk Deƒüerler (0.1-0.4): Daha kesin ve daha tutarlƒ± cevaplar verilir. Model daha tahmin edilebilir hale gelir. 
- Orta Deƒüerler(0.5-0.7): Hem mantƒ±klƒ± hem de yaratƒ±cƒ± cevaplar verilir. 
- Y√ºksek Deƒüerler (0.7-1): Daha rastgele ve yaratƒ±cƒ± , ancak bazen tutarsƒ±z yanƒ±tlar verebilir

In [16]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",
    temperature=0.3,  # Gemini 3.0+ defaults to 1.0
    max_tokens=500
)

In [17]:
# Test i√ßin basit bir LLM √ßaƒürƒ±sƒ±
from langchain_google_genai import ChatGoogleGenerativeAI

test_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite", temperature=0.3)
test_response = test_llm.invoke("Hello, this is a LangSmith test!")
print(test_response.content)

Hello! I'm ready for your LangSmith test. How can I help you with it?

Are you looking to:

*   **Test my ability to generate specific types of output?** (e.g., code, creative text, summaries)
*   **Test my understanding of LangSmith concepts?** (e.g., tracing, evaluation, prompt management)
*   **Test my integration with LangSmith tools?** (e.g., can I generate outputs that are easily traceable?)
*   **Something else entirely?**

Please provide me with the details of your test! I'm eager to see what you have in store.


In [18]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [19]:
system_prompt = (
    "You are assistant for question-answering tasks"
    "Use the following pieces of context to answer the question at the end."
    "If you don't know the answer, just say that you don't know, don't try to make up an answer." \
    "Use three sentences maximum to answer."
    "\n\n"
    "{context}"
)

In [20]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{input}"),
    ]
)

## Soru-Cevap Zinciri Olu≈üturma ( LLM + PROMPT )

In [21]:
question_answering_chain = create_stuff_documents_chain(llm,prompt)

## RAG Zinciri Olut≈üurma ( RAG + LLM )

In [22]:
rag_chain = create_retrieval_chain(retriever,question_answering_chain)

## Kullanƒ±cƒ± sorgusunu √ßalƒ±≈ütƒ±rma

In [23]:
response = rag_chain.invoke({"input": "Explain the transformer architecture?"})

In [24]:
print(response["answer"])

The Transformer architecture is a novel neural network design that relies entirely on attention mechanisms, eschewing traditional recurrent or convolutional layers. It consists of an encoder and a decoder, each composed of a stack of identical layers. Each encoder layer has a multi-head self-attention mechanism and a position-wise feed-forward network, with residual connections and layer normalization applied. The decoder includes these two sub-layers plus a third multi-head attention sub-layer that attends to the encoder's output, also incorporating residual connections and layer normalization.
