In [1]:
geography = """Geography is the study of Earth's landscapes, environments, and the relationships between people and their surroundings. 
It explores physical features like mountains, rivers, and climate, as well as human activities such as cities, cultures, and economies. 
Geography is divided into two main branches: physical geography, which focuses on natural processes and landforms, and human geography, 
which examines how humans interact with the environment. It helps us understand global issues like climate change, urbanization, 
and resource distribution. By analyzing maps, spatial data, and fieldwork, geographers gain insights that support planning, development,
disaster response, and sustainable living across regions and nations."""

mental_health = """Mental health refers to a person’s emotional, psychological, and social well-being. It affects how individuals think, feel, and 
act, as well as how they handle stress, relate to others, and make choices. Good mental health allows people to cope with life’s challenges, work 
productively, and enjoy relationships. Mental health conditions like anxiety, depression, or stress disorders are common and treatable. Promoting 
awareness, support, and healthy habits is essential for maintaining overall mental well-being."""

elon_musk = """Elon Musk is a billionaire entrepreneur, inventor, and engineer known for revolutionizing multiple industries. He founded SpaceX, 
aiming to colonize Mars, and co-founded Tesla, which leads in electric vehicles and clean energy. Musk also co-founded PayPal and Neuralink, a
nd owns X (formerly Twitter). He’s known for bold goals, like reducing climate change and advancing artificial intelligence. His work blends 
innovation, risk-taking, and futuristic vision, making him one of the most influential figures in modern technology."""

culture = """Culture is the shared beliefs, values, customs, behaviors, and traditions of a group of people, passed down through generations. 
It shapes how individuals view the world, interact with others, and express themselves through language, art, food, clothing, and rituals. 
Culture influences identity and community, providing a sense of belonging. It can vary widely across regions and evolve over time. Respecting 
and understanding diverse cultures promotes global harmony, tolerance, and meaningful cross-cultural connections in 
today’s interconnected world."""

# Convert text into embedding

In [7]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-mpnet-base-v2")

In [8]:
embeddings = embedding_model.encode([geography, mental_health, elon_musk, culture])

In [9]:
embeddings

array([[ 0.02650855,  0.01738664, -0.01297633, ...,  0.02099759,
         0.05051146,  0.02731754],
       [-0.02322867,  0.00350186, -0.01885535, ..., -0.05385259,
         0.04044566, -0.00867071],
       [ 0.01887171,  0.07717203,  0.01331495, ..., -0.02125296,
         0.03494312, -0.02039053],
       [-0.04407756,  0.02975902, -0.03792736, ...,  0.04357577,
         0.0375245 ,  0.00934733]], dtype=float32)

In [10]:
len(embeddings[0])

768

In [14]:
from pinecone import Pinecone 
from pinecone import ServerlessSpec 

pc = Pinecone(api_key = "pcsk_7CTjfd_DSsb9RXh4yM3VCwCGUyC1dzWNmFEMVjf4Wa3mZBtRMpEzR8aCev2AkncY2duTTy") 
spec = ServerlessSpec(cloud='aws', region = 'us-east-1')

# Create Index

In [16]:
pc.create_index("my-index", dimension=768, metric="cosine", spec=spec)

{
    "name": "my-index",
    "metric": "cosine",
    "host": "my-index-49d9smx.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 768,
    "deletion_protection": "disabled",
    "tags": null
}

In [17]:
index = pc.Index('my-index')

# Before Add Data 

In [18]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

# Add Data to Index 

In [19]:
index.upsert([
    {"id":"id1", "values": embeddings[0], "metadata":{"sourse":"geography"}},
    {"id":"id2", "values": embeddings[1], "metadata":{"sourse":"mental_health"}}, 
    {"id":"id3", "values": embeddings[2], "metadata":{"sourse":"elon_musk"}}, 
    {"id":"id4", "values": embeddings[3], "metadata":{"sourse":"culture"}}
])

{'upserted_count': 4}

In [20]:
index.describe_index_stats() 

{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4,
 'vector_type': 'dense'}

# Similarity Search 

In [21]:
query = "technology build" 
query_embedding = embedding_model.encode([query])

In [22]:
len(query_embedding[0])

768

In [24]:
index.query(vector=query_embedding[0].tolist(), top_k=1, include_values=False, include_metadata=True)

{'matches': [{'id': 'id3',
              'metadata': {'sourse': 'elon_musk'},
              'score': 0.235266,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}

# Add New Data 

In [25]:
politics = """Politics is the process of making decisions that apply to members of a society. It involves the governance of a country or 
community through laws, policies, and leadership. Politics shapes how power and resources are distributed, often through parties, 
elections, and public debate. It reflects the values, beliefs, and priorities of a society. Engaging in politics allows citizens to influence 
change, protect rights, and ensure accountability in governments and institutions that affect daily life."""

embedding_query = embedding_model.encode(politics).tolist()

In [26]:
embedding_query

[-0.009844476357102394,
 0.02832801453769207,
 -0.011571899987757206,
 -0.005694866180419922,
 -0.01527413260191679,
 -0.014208917506039143,
 -0.05250699073076248,
 0.0017571017378941178,
 0.042092856019735336,
 0.03285244107246399,
 0.06716854870319366,
 -0.05330522358417511,
 0.0033172029070556164,
 0.029278337955474854,
 0.016456084325909615,
 -0.13702477514743805,
 0.025235913693904877,
 -0.04262647032737732,
 0.04989631846547127,
 0.021971775218844414,
 -0.024878695607185364,
 -0.004092655144631863,
 0.058250464498996735,
 0.004192735999822617,
 -0.0147545849904418,
 0.012120295315980911,
 0.02359721250832081,
 0.021574759855866432,
 0.046191759407520294,
 0.033343348652124405,
 0.006763340439647436,
 -0.05369093641638756,
 0.010521129705011845,
 -0.02891390770673752,
 2.0810277874261374e-06,
 -0.011481672525405884,
 0.02227349393069744,
 -0.0018603758653625846,
 0.02705305442214012,
 -0.02989901974797249,
 0.08905930817127228,
 0.02863740734755993,
 -0.017369190230965614,
 0.0362

In [27]:
len(embedding_query)

768

### Before Add New Data 

In [28]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4,
 'vector_type': 'dense'}

In [31]:
index.upsert([{"id":"id5", "values":embedding_query, "metadata":{"sourse":"politics"}}])

{'upserted_count': 1}

### After Add New Data 

In [32]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

# Read Data 

In [33]:
read = index.fetch(ids=['id1', 'id4'])

In [34]:
read 

FetchResponse(namespace='', vectors={'id4': Vector(id='id4', values=[-0.044077564, 0.0297590196, -0.0379273631, 0.0371023864, -0.0290923696, 0.0354945548, -0.0208170358, -0.0169451684, 0.0642257631, 0.0129257804, 0.000430570595, -0.0359101631, 0.0138734812, 0.0480090454, 0.0243353769, -0.0568176098, 0.012485031, -0.00110307056, -0.0318172053, 0.0228475, 0.0263190102, 0.0172759108, 0.0648049861, -0.00302002439, -0.0389010496, -0.0252263695, 0.0351753831, -0.0322054215, 0.0303614251, -0.0199075658, 0.0246792343, -0.0307602212, 0.0161665529, -0.0273652133, 2.45914453e-06, -0.00456126034, -0.0126958769, -0.0218955707, 0.0268048011, 0.0105299661, 0.0816274583, 0.0190420579, 0.00917886849, 0.00857934449, -0.00365420221, -0.0203342568, -0.0208466388, 0.0307255462, -0.0184314456, 0.0259962399, 0.00721542258, -0.0820182487, -0.0412527509, -0.00175538275, -0.0173081495, 0.0313290097, 0.0571386889, 0.0768963099, -0.015023238, -0.0391267538, -0.0382953659, 0.00209760712, -0.0345489755, 0.003100902

# Update Data 

In [35]:
sri_lanka_geography = """Sri Lanka is an island nation in the Indian Ocean with diverse geography, including coastal plains, 
central highlands, rivers, tropical forests, and a warm climate influenced by monsoons."""

embedding_query = embedding_model.encode(sri_lanka_geography).tolist() 
index.upsert([("id1", embedding_query, {"source":"sri_lanka_geography"})])

{'upserted_count': 1}

# Delete Data 

In [36]:
index.delete(ids=['id3'])

{}

In [37]:
index.fetch(ids=['id3'])

FetchResponse(namespace='', vectors={}, usage={'read_units': 1})

# Delete Index

In [38]:
pc.delete_index('my-index')