## Embedding sample with gemini

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Following package needs to be unistalled using terminal for running GoogleGenerativeAIEmbeddings from langchain_google_genai
# #pip uninstall google-generativeai

In [3]:
os.environ["GOOGLE_API_KEY"] = os.getenv('google_api_key')

In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [5]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", task_type="retrieval_document")

In [6]:
vector = embeddings.embed_query("hello, world!")
vector[:5]

[0.006846265867352486,
 -0.02251487784087658,
 -0.05496913567185402,
 -0.020021894946694374,
 -0.010026923380792141]

In [7]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07", task_type="retrieval_document")

In [8]:
vector = embeddings.embed_query("hello, world!")
vector[:5]

[-0.022807613015174866,
 0.010274127125740051,
 0.011706576682627201,
 -0.09699729084968567,
 -0.0027657211758196354]

In [14]:
len(vector)

3072

## Embedding the document chunks

In [9]:
# pdf loader
from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader("Swapnil Ransing Machine Learning Resume.pdf")
text_doc=loader.load()
text_doc

[Document(metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2025-04-15T09:48:47+05:30', 'author': 'Kannan D', 'keywords': 'PUBLIC -', 'moddate': '2025-04-15T09:48:47+05:30', 'source': 'Swapnil Ransing Machine Learning Resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="PUBLIC \nSwapnil Ranshing           \uf028: +91- 836 974 6859 \nData Scientist , Western Union, Pune, India             E-Mail: swapnilransing001@gmail.com \nPortfolio Website:  Swapnil's Portfolio Website                                             LinkedIn Profile:  Swapnil's LinkedIn Profile  \nSummary   \no 7.5 years of expertise in machine learning, deep learning, natural language processing (NLP), GenAI, model \ndevelopment and deployment for fraud and credit risk detection and payment fraud risk strategy development \no Postgraduate degree from IIT Bombay and (Elite Silver) certificate for Data Science for Engineers from IIT Madras  \nand cert

In [10]:
text_doc[0]

Document(metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2025-04-15T09:48:47+05:30', 'author': 'Kannan D', 'keywords': 'PUBLIC -', 'moddate': '2025-04-15T09:48:47+05:30', 'source': 'Swapnil Ransing Machine Learning Resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="PUBLIC \nSwapnil Ranshing           \uf028: +91- 836 974 6859 \nData Scientist , Western Union, Pune, India             E-Mail: swapnilransing001@gmail.com \nPortfolio Website:  Swapnil's Portfolio Website                                             LinkedIn Profile:  Swapnil's LinkedIn Profile  \nSummary   \no 7.5 years of expertise in machine learning, deep learning, natural language processing (NLP), GenAI, model \ndevelopment and deployment for fraud and credit risk detection and payment fraud risk strategy development \no Postgraduate degree from IIT Bombay and (Elite Silver) certificate for Data Science for Engineers from IIT Madras  \nand certi

In [11]:
# 1. Recursive character text splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
final_docs= text_splitter.split_documents(text_doc)

In [12]:
final_docs

[Document(metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2025-04-15T09:48:47+05:30', 'author': 'Kannan D', 'keywords': 'PUBLIC -', 'moddate': '2025-04-15T09:48:47+05:30', 'source': 'Swapnil Ransing Machine Learning Resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="PUBLIC \nSwapnil Ranshing           \uf028: +91- 836 974 6859 \nData Scientist , Western Union, Pune, India             E-Mail: swapnilransing001@gmail.com \nPortfolio Website:  Swapnil's Portfolio Website                                             LinkedIn Profile:  Swapnil's LinkedIn Profile  \nSummary   \no 7.5 years of expertise in machine learning, deep learning, natural language processing (NLP), GenAI, model \ndevelopment and deployment for fraud and credit risk detection and payment fraud risk strategy development \no Postgraduate degree from IIT Bombay and (Elite Silver) certificate for Data Science for Engineers from IIT Madras  \nand cert

In [13]:
print(final_docs[0])
print('-'*50)
print(final_docs[1])

page_content='PUBLIC 
Swapnil Ranshing           : +91- 836 974 6859 
Data Scientist , Western Union, Pune, India             E-Mail: swapnilransing001@gmail.com 
Portfolio Website:  Swapnil's Portfolio Website                                             LinkedIn Profile:  Swapnil's LinkedIn Profile  
Summary   
o 7.5 years of expertise in machine learning, deep learning, natural language processing (NLP), GenAI, model 
development and deployment for fraud and credit risk detection and payment fraud risk strategy development 
o Postgraduate degree from IIT Bombay and (Elite Silver) certificate for Data Science for Engineers from IIT Madras  
and certificate for Deep Learning specialization from DeepLearning.AI and Coursera 
Skills Summary   
o Machine Learning and Deep Learning Models:  Supervised and Unsupervised Learning, Ensemble Methods, 
Decision Trees, Random Forest, XGBoost, Deep Learning, Neural Networks, CNN, RNN, optimization algorithms' metadata={'producer': 'Microsoft® Wor

In [15]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [16]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07", task_type="retrieval_document")

In [None]:
# Example
vectors = embeddings.embed_documents(
    [
        "Today is Monday",
        "Today is Tuesday",
        "Today is April Fools day",
    ]
)
len(vectors), len(vectors[0])

In [24]:
[doc.page_content for doc in final_docs]

["PUBLIC \nSwapnil Ranshing           \uf028: +91- 836 974 6859 \nData Scientist , Western Union, Pune, India             E-Mail: swapnilransing001@gmail.com \nPortfolio Website:  Swapnil's Portfolio Website                                             LinkedIn Profile:  Swapnil's LinkedIn Profile  \nSummary   \no 7.5 years of expertise in machine learning, deep learning, natural language processing (NLP), GenAI, model \ndevelopment and deployment for fraud and credit risk detection and payment fraud risk strategy development \no Postgraduate degree from IIT Bombay and (Elite Silver) certificate for Data Science for Engineers from IIT Madras  \nand certificate for Deep Learning specialization from DeepLearning.AI and Coursera \nSkills Summary   \no Machine Learning and Deep Learning Models:  Supervised and Unsupervised Learning, Ensemble Methods, \nDecision Trees, Random Forest, XGBoost, Deep Learning, Neural Networks, CNN, RNN, optimization algorithms",
 'Decision Trees, Random Forest,

In [27]:
# Example
vectors = embeddings.embed_documents([doc.page_content for doc in final_docs][0:2])
len(vectors), len(vectors[0])

(2, 3072)