In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# index 생성

In [5]:
!pip install pinecone

Collecting pinecone
  Downloading pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone)
  Downloading pinecone_plugin_assistant-1.8.0-py3-none-any.whl.metadata (30 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting packaging<25.0,>=24.2 (from pinecone-plugin-assistant<2.0.0,>=1.6.0->pinecone)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pinecone-7.3.0-py3-none-any.whl (587 kB)
   ---------------------------------------- 0.0/587.6 kB ? eta -:--:--
   ---------------------------------------- 0.0/587.6 kB ? eta -:--:--
   ----------------- ---------------------- 262.1/587.6 kB ? eta -:--:--
   ----------------- ---------------------- 262.1/587.6 kB ? eta -:--:--
   ----------------- ---------------------- 262.1/587.6 kB ? eta -:--:--
   --------------------------------- ---- 524.3/587.6

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-upstage 0.7.3 requires tokenizers<0.21.0,>=0.20.0, but you have tokenizers 0.22.0 which is incompatible.


In [6]:
from pinecone import Pinecone
# 클라이언트 생성
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)

In [7]:
from pinecone import ServerlessSpec
pinecone_client.create_index(
    name='embedding-3d',
    dimension=3,
    metric='cosine',
    spec=ServerlessSpec(cloud='aws', # 서버없이 
    region='us-east-1'
    )
)

{
    "name": "embedding-3d",
    "metric": "cosine",
    "host": "embedding-3d-ou15dnm.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 3,
    "deletion_protection": "disabled",
    "tags": null
}

In [9]:
index = pinecone_client.Index('embedding-3d')
index

  from .autonotebook import tqdm as notebook_tqdm


<pinecone.db_data.index.Index at 0x1dc98afe510>

In [10]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

In [12]:
for ids in index.list(namespace='embedding-3d-ns1'):
    print(ids)

# pinecone data 형식
### vector db의 모델링 지정
[{'id': 'vec1' , 'values': [1.0, 1.5, 2.0], 'metadata': {'genre':'drama'}},
{'id': 'vec2' , 'values': [2.0, 1.5, 0.5], 'metadata': {'genre':'action'}},
{'id': 'vec3' , 'values': [0.1, 0.3, 0.5], 'metadata': {'genre':'drama'}},
{'id': 'vec4' , 'values': [0.3, 1.7, 1.4], 'metadata': {'genre':'drama'}},
{'id': 'vec5' , 'values': [1.5, 1.2, 3.4], 'metadata': {'genre':'action'}},
{'id': 'vec6' , 'values': [2.2, 0.8, 2.7], 'metadata': {'genre':'action'}},
] 
## # 지정해준 3차원 -> values의 차원!!

In [13]:
index.upsert(
    vectors = [{'id': 'vec1' , 'values': [1.0, 1.5, 2.0], 'metadata': {'genre':'drama'}},
    {'id': 'vec2' , 'values': [2.0, 1.5, 0.5], 'metadata': {'genre':'action'}},
    {'id': 'vec3' , 'values': [0.1, 0.3, 0.5], 'metadata': {'genre':'drama'}},
    {'id': 'vec4' , 'values': [0.3, 1.7, 1.4], 'metadata': {'genre':'drama'}},
    {'id': 'vec5' , 'values': [1.5, 1.2, 3.4], 'metadata': {'genre':'action'}},
    {'id': 'vec6' , 'values': [2.2, 0.8, 2.7], 'metadata': {'genre':'action'}},
    ],
    namespace='embedding-3d-ns1'
) 
# update + insert. 없으면 insert, 있으면 update

{'upserted_count': 6}

In [14]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'embedding-3d-ns1': {'vector_count': 6}},
 'total_vector_count': 6,
 'vector_type': 'dense'}

# id list

In [16]:
for ids in index.list(namespace='embedding-3d-ns1'):
    print(ids)

['vec1', 'vec2', 'vec3', 'vec4', 'vec5', 'vec6']


# query

In [20]:
response = index.query(
    namespace='embedding-3d-ns1',
    vector=[0.1, 0.4, 0.7],
    top_k=3,
    include_values=True,
    include_metadata=True,
    filter={'genre':{'$eq':'drama'}} # 메타정보
)
response

{'matches': [{'id': 'vec3',
              'metadata': {'genre': 'drama'},
              'score': 1.00008953,
              'values': [0.1, 0.3, 0.5]},
             {'id': 'vec1',
              'metadata': {'genre': 'drama'},
              'score': 0.961182,
              'values': [1.0, 1.5, 2.0]},
             {'id': 'vec4',
              'metadata': {'genre': 'drama'},
              'score': 0.936294496,
              'values': [0.3, 1.7, 1.4]}],
 'namespace': 'embedding-3d-ns1',
 'usage': {'read_units': 1}}