In [3]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
from pinecone import Pinecone,ServerlessSpec


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import json

In [5]:
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Prof. Smith',
  'review': 'Great professor, explains complex concepts clearly.',
  'subject': 'Mathematics',
  'stars': 5},
 {'professor': 'Dr. Johnson',
  'review': 'Engaging lectures, but assignments can be challenging.',
  'subject': 'Physics',
  'stars': 4},
 {'professor': 'Prof. Garcia',
  'review': 'Knowledgeable, but sometimes moves too fast through material.',
  'subject': 'Chemistry',
  'stars': 3},
 {'professor': 'Dr. Lee',
  'review': 'Excellent teacher, always available for extra help.',
  'subject': 'Biology',
  'stars': 5},
 {'professor': 'Prof. Brown',
  'review': "Tough grader, but you'll learn a lot.",
  'subject': 'History',
  'stars': 4},
 {'professor': 'Dr. Davis',
  'review': 'Passionate about the subject, makes classes interesting.',
  'subject': 'Literature',
  'stars': 5},
 {'professor': 'Prof. Wilson',
  'review': 'Fair and approachable, good at explaining difficult concepts.',
  'subject': 'Computer Science',
  'stars': 4},
 {'professor': 'Dr. 

In [6]:
pc=Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

In [7]:
path=os.getenv('REVIEWS_PATH') or 'reviews.json'

In [8]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


In [9]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [10]:
dat=[]
for review in data['reviews']:
    res=genai.embed_content(
        model='models/text-embedding-004',
        content=review['review']
    )
    embedding=res['embedding']
    dat.append({
    "values":embedding,
    "id":review['professor'],
    "metadata":{
        "review": review['review'],
        "subject": review['subject'],
        "stars":review['stars']
    }
})

In [11]:
dat[0]

{'values': [0.028052343,
  0.0037563865,
  -0.050880432,
  0.06645796,
  0.048141528,
  -0.0042578904,
  -0.016237272,
  0.0024673606,
  0.015924323,
  0.0077421903,
  0.07699675,
  0.029481938,
  0.010413967,
  -0.015864812,
  0.043546006,
  -0.06109745,
  -0.0053736246,
  0.018341236,
  -0.097192295,
  0.010504547,
  0.0300818,
  -0.03056684,
  -0.0104710525,
  -0.02565614,
  0.007926024,
  0.0014643596,
  0.0035604378,
  -0.026162358,
  0.021420429,
  -0.029609421,
  0.021856388,
  0.00996833,
  -0.0672707,
  -0.059559546,
  -0.026306503,
  0.050863534,
  -0.0021128042,
  -0.013736515,
  0.035877634,
  -0.061069094,
  -0.01226773,
  -0.040249933,
  -0.033729777,
  0.021917127,
  -0.029964035,
  -0.004540944,
  -0.018629383,
  0.107005194,
  0.020958744,
  0.09914956,
  0.0049315817,
  0.037234277,
  -0.05828532,
  0.04357877,
  0.0014341745,
  -0.0395256,
  -0.05211552,
  -0.03085921,
  0.060319073,
  0.018572303,
  -0.05067091,
  0.00804377,
  -0.026966775,
  -0.08772054,
  0.03747

In [12]:
index= pc.Index("rag")
upsert_response=index.upsert(
    vectors=dat,
    namespace="ns1",
)
print(f"Upserted count:{upsert_response['upserted_count']}")
print(index.describe_index_stats())

Upserted count:20
{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}
