In [23]:
from dotenv import load_dotenv
load_dotenv()
import os
from pinecone import Pinecone, ServerlessSpec
import google.generativeai as genai

In [21]:
pc = Pinecone(api_key= os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag",
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud='aws', region='us-east-1')
)

In [None]:
import json
data = json.load(open('reviews.json'))
data['reviews']


In [31]:
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
processed_data=[]
for review in data['reviews']:
    response = genai.embed_content(
        content=['review'],
        model='models/text-embedding-004',
        task_type='retrievAl_document',
        title='Embedding Reviews'
    )
    embedding = response['embedding']
    processed_data.append({
        'values': embedding,
        'id':review['professor'],
        'metadata':{
            'review': review['review'],
            'subject': review['subject'],
            'stars': review['stars']
            }
    })


In [39]:
def flatten_and_convert_to_floats(nested_list):
    """Flattens a nested list and converts all elements to floats."""
    flattened_list = []
    for item in nested_list:
        if isinstance(item, list):  # Check if the item is a list
            flattened_list.extend(flatten_and_convert_to_floats(item))  # Recursively flatten
        else:
            flattened_list.append(float(item))  # Convert to float and add to list
    return flattened_list

for item in processed_data:
    if isinstance(item['values'], list):
        try:
            item['values'] = flatten_and_convert_to_floats(item['values'])
        except ValueError as e:
            raise TypeError(f"Failed to convert values to floats: {e}")
    else:
        raise TypeError("Values should be a list of floats")

processed_data[0]

{'values': [-0.021477208,
  0.012040233,
  -0.02542571,
  0.01968888,
  0.05893296,
  -0.014717235,
  0.051322743,
  0.06210496,
  -0.05337003,
  -0.009231608,
  0.018462807,
  0.008009849,
  0.054162003,
  0.029296353,
  -0.014421016,
  -0.0757814,
  0.0032037268,
  -0.0063066143,
  -0.10731738,
  0.016733589,
  0.02538023,
  -0.06846708,
  0.022288745,
  -0.023842625,
  0.017241871,
  -0.068895675,
  0.04323284,
  -0.011717539,
  -0.0190911,
  -0.039529357,
  0.031973626,
  0.053617544,
  0.03576343,
  -0.019502694,
  0.0014624217,
  0.059654858,
  0.005300163,
  0.03788554,
  0.017622773,
  -0.05720467,
  -0.052650545,
  0.036468383,
  0.008984589,
  0.05185959,
  -0.033160556,
  -0.012649032,
  0.015929809,
  0.057647582,
  -0.038126945,
  0.044819463,
  0.028459197,
  0.007573816,
  -0.08293146,
  0.055167742,
  -0.03830907,
  -0.026445346,
  -0.053564217,
  -0.06032914,
  -0.012127309,
  0.03440671,
  0.046179052,
  -0.02928808,
  -0.027519349,
  0.005690456,
  0.027271673,
  -0.

In [40]:

index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace='ns1'
)

{'upserted_count': 20}

In [41]:
index.describe_index_stats()


{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}