In [27]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

True

In [5]:
from pinecone import Pinecone

pc = Pinecone()

pc.list_indexes()

{'indexes': []}

In [7]:
pc.list_indexes().names()

[]

In [24]:
from pinecone import PodSpec

if "langchain" not in pc.list_indexes().names():
    print(f"Creating Index langchain")
    pc.create_index(
        name="langchain",
        dimension=1536,
        metric="cosine",
        spec=PodSpec(environment="gcp-starter"),
    )
    print("Index Created")
else:
    print(f"Index langcain already exist")

Creating Index langchain
Index Created


In [19]:
index = pc.Index("langchain")
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [27]:
import random

vectors = [[random.random() for i in range(1536)] for v in range(5)]
print(vectors)
ids = list("abcde")

index_name = "langchain"
index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))

[[0.3172261099888022, 0.2302750884254594, 0.2943920061797253, 0.036915439558400265, 0.5202536875197026, 0.5292761619029763, 0.24464452959092775, 0.7076287135258257, 0.8163247007489663, 0.702418013130165, 0.8505767773792942, 0.3284691653858812, 0.5095134732228073, 0.7459532185351668, 0.9088414639653281, 0.5050880555579528, 0.2734440712366125, 0.5054140748568002, 0.3964225931195713, 0.9355295156262184, 0.15587557024336318, 0.8933769339284912, 0.23671751115642525, 0.5916598725064536, 0.11010092472222954, 0.8328519669666028, 0.6008802622686779, 0.6931657973615394, 0.936727582010001, 0.6312249438835799, 0.46930953561176614, 0.032223174948948, 0.42787860263147826, 0.3116895559301094, 0.7961428323536142, 0.9979589736967932, 0.746954478683025, 0.1878600436234984, 0.40686052196718325, 0.9286280886734998, 0.46197587336163415, 0.7507843948242434, 0.11197757256963636, 0.64546972999459, 0.8291185859305708, 0.11246621739875085, 0.9493663250705897, 0.17286597471032594, 0.16216780967436562, 0.74307357

{'upserted_count': 5}

In [13]:
index.upsert(vectors=[("c", [0.5] * 1536)])

{'upserted_count': 1}

In [30]:
index.fetch(ids=["c", "d"])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'c': {'id': 'c',
                   'values': [0.543201208,
                              0.460173517,
                              0.758745492,
                              0.347906649,
                              0.923381,
                              0.438489914,
                              0.111049645,
                              0.768866122,
                              0.095231548,
                              0.227137044,
                              0.302592874,
                              0.701104224,
                              0.872320414,
                              0.513824284,
                              0.921267629,
                              0.824459195,
                              0.50730592,
                              0.1362115,
                              0.349995822,
                              0.645192623,
                              0.871438384,
                          

In [15]:
index.delete(ids=["b", "c"])

{}

In [16]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 3e-05,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [23]:
pc.delete_index("langchain")

In [21]:
query_vector = [random.random() for _ in range(1536)]

In [22]:
index.query(vector=query_vector, top_k=3, include_values=False)

{'matches': [{'id': 'd', 'score': 0.755178332, 'values': []},
             {'id': 'e', 'score': 0.75496, 'values': []},
             {'id': 'b', 'score': 0.746139288, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

In [31]:
print(index.describe_index_stats())

{'dimension': 1536,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}


In [26]:
index = pc.Index(index_name)

In [32]:
vectors = [[random.random() for i in range(1536)] for v in range(3)]
ids = list("xyz")
# index = pc.Index(index_name)
index.upsert(vectors=zip(ids, vectors), namespace="first-namespace")

{'upserted_count': 3}

In [33]:
vectors = [[random.random() for i in range(1536)] for v in range(2)]
ids = list("qp")
# index = pc.Index(index_name)
index.upsert(vectors=zip(ids, vectors), namespace="second-namespace")

{'upserted_count': 2}

In [53]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 7e-05,
 'namespaces': {'': {'vector_count': 5},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 7}

In [52]:
index.fetch(ids=["y"], namespace="first-namespace")

{'namespace': 'first-namespace', 'usage': {'read_units': 1}, 'vectors': {}}

In [51]:
index.delete(ids=["yz"], namespace="first-namespace")

{}

In [57]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text = """I grew up in the Eastern Cape, an area of South Africa much like the one so well described to you by Thomas Hardy. It was a world of oral tradition, healing properties were herbal, an abscess would be treated with poultices, clean water was simply not available. Gastro intestinal infections, malaria, cholera were rampant. Life was brutish and short. Electricity and the horseless carriage did not exist for me. The hardness and poverty of existence was aggravated by an uncaring society. Colonies were meant to be exploited both for the mother country and for those who came to settle in our area. Fresh in the memories of the older generation of the poorest of the very poor, and spoken of in low hushed tones, what was said to be a hugely profitable business that was said to have been abolished – that was the business of slavery. My youth and young adulthood was spent with others in fighting an unjust and oppressive system.

Many African leaders today would say exactly the same thing. And when you assess the achievements and failures of Africa you must always keep this background in mind. One of the greatest mistakes which is made by serious political commentators today is to judge us on the same basis by which you judge opinion makers in the old and advanced industrial countries, forgetting that for more than three centuries our people were denied the privileges which you take for granted.

You went to the best schools in the country – well equipped with highly qualified educators; classrooms properly equipped with learning aids; where the language at school was identical to the language at home; with parents with a high level of educational accomplishment, who could help their children to grasp sophisticated concepts at an early age.

But when you consider the situation of the blacks in Africa you come across a different state of affairs. Children who go to school without any learning aids. Taught in a language which is not theirs, by teachers often not so very qualified. A child comes back from school normally to parents who have no educational background at all. Poor children eating porridge in the morning, porridge at lunch, porridge as their dinner, unable to concentrate. Large families with little room to move about. A child who shares a room with about three or four others. No table, no chairs. Doing their homework on the floor.

These are the people who live in Africa today and I hope that when you make your assessment you will bear in mind this background. The people who run the governments in Africa today are people who were never given any opportunity to train in government, as many of you are. And I have no doubt that you will bear this in mind, not only in our discussions here but when examining the whole situation in Africa.

Being a former head of state has its advantages. One of them is having the time to speak in institutions where young people must listen while their elders pose difficult questions. I trust that my honorary membership of the student unions of a number of London University colleges, including LSE, will not mean that I have to try and provide answers myself! Nor that I will be examined on what I say. One shares one’s thoughts with every confidence at a university with a proud record of solidarity with the struggles of oppressed people, and which is also renowned world-wide, as a centre of learning and enquiry.

LSE, as part of the University of London, was in the vanguard of the great army of men and women across the world who responded to the call to isolate the apartheid regime. They insisted that human rights are the rights of all people everywhere. I feel greatly honoured to have an honorary degree from the University of London. Today brings an opportunity to thank LSE in person and with all humility for the part it played in that tribute to the South African people for their achievement in turning from conflict to the peaceful pursuit of a better life for all.

For many South Africans, LSE also meant the opportunity for learning that apartheid denied them in their own country. Those who were students are now working in all sectors of our society, leaders of a nation, leading a bright and common future. We continue to draw upon you for training and knowledge in fields that are critical to the development of our country. May your practical solidarity and our partnership long continue. Your invitation to me to reflect with you on the challenges facing Africa speaks of your continuing commitment to our shared goals and I thank you most sincerely."""

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100, chunk_overlap=20, length_function=len
)

In [59]:
chunks = text_splitter.create_documents([text])
print(chunks[0])

page_content='I grew up in the Eastern Cape, an area of South Africa much like the one so well described to you by'


In [63]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens:{total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens/1000*0.0004:.6f}')
    
print_embedding_cost(chunks)

Total Tokens:1066
Embedding Cost in USD: 0.000426


In [4]:
import pandas as pd
df = pd.read_csv('test.csv',sep=";")

In [5]:
def create_output_string(row):
    output = ""
    for column, value in row.items():
        output += f"{column}: {value}\n"
    return output

In [14]:
from openai import OpenAI
client = OpenAI()

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding



In [18]:
df['Output'] = df.apply(create_output_string, axis=1)

df['Embeddings'] = df['Output'].apply(lambda x: get_embedding(x, model='text-embedding-3-small'))


In [35]:
import pinecone
from pinecone import PodSpec
from pinecone import Pinecone
pc = Pinecone()
index_name = "langchain"
pc.create_index(
        name="langchain",
        dimension=1536,
        metric="cosine",
        spec=PodSpec(environment="gcp-starter"),
    )

# Upsert the embeddings into the Pinecone index
index = pc.Index(index_name)
# index.upsert(items=df.index, vectors=df['Embeddings'])
# index.upsert(vectors=[("c", [0.5] * 1536)])


In [39]:
# df.apply(lambda x: index.upsert(vectors=[(x['Number'], x['Embeddings'])]), axis=1)

In [44]:
#!/bin/python3

import math
import os
import random
import re
import sys

#
# Complete the 'marsExploration' function below.
#
# The function is expected to return an INTEGER.
# The function accepts STRING s as parameter.
#

def marsExploration(s):
    # Write your code here
    count = 0
    for i in range(len(s),3):
        tempStr = s[i:i+3]
        if(tempStr != 'SOS'):
            print('else')
            for i in range(len(tempStr)):
                if(i==0 or i==2):
                    if(tempStr[i]=='S'):
                        pass
                    else:
                        count+=1
                if(i==1):
                    if(tempStr[0]=='O'):
                        pass
                    else:
                        count+=1
    return count

if __name__ == '__main__':
    # fptr = open(os.environ['OUTPUT_PATH'], 'w')

    s = input()

    result = marsExploration(s)

    print(result)


0
