In [20]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

# OPENAI

In [22]:
from openai import OpenAI

## Test

In [24]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
    base_url=os.environ.get("BASE_URL")
)

In [29]:
completion = client.chat.completions.create(
    model = "gpt-3.5-turbo",
    messages = [
        {"role":"system", "content": "you are GPT-6"},
        {"role":"user", "content": "hello! 你知道什么是AIGC吗"}
    ]
)
completion.choices[0].message.content
response = client.embedding.create(
    input = '要生成embedding的输入文本，字符串形式',
    model = 'text-embedding-3-small'
)
response

AttributeError: 'OpenAI' object has no attribute 'embedding'

## 正式 

In [31]:
def openai_embedding(text: str, model: str=None):
    # 获取环境变量 OPENAI_API_KEY
    api_key=os.environ['OPENAI_API_KEY'],
    base_url=os.environ.get('BASE_URL'),
    client=OpenAI(api_key=api_key,base_url=base_url),
    
    # embedding model: 'text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'
    if model == 'None':
        model='text-embedding-3-small'
    
    response = client.embedding.create(
        input = text,
        model = model
    )
    return response

# response = openai_embedding(text='要生成embedding的输入文本，字符串形式。')

## QIANFAN

In [35]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [46]:
import requests
import json

In [78]:
def wenxin_embedding(text: str):
    api_key = os.environ['QIANFAN_AK']
    secret_key = os.environ.get('QIANFAN_SK')
    
    # 使用API KEY,SECRET KEY 向网站获取Acess token
    url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}".format(api_key, secret_key)
    payload = json.dumps({})
    headers = {
        'Content-type': 'application/json',
        'Accept': 'application/json'
    }
    response = requests.request("POST",url, headers=headers, data=payload)
    
    if response.status_code == 200:
        # sussessiful
        access_token_info = response.json()
        access_token  = access_token_info.get('access_token')
        print("Access Token:",access_token)
    else:
        # failed
        print(f"请求失败，状态码：{response.status_code},响应内容：{response.text}")
        return None
    
    # 通过获取的Access token 来embedding text
    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/embedding-v1?access_token=" + str(response.json().get("access_token"))
    inp = []
    inp.append(text)
    payload = json.dumps({
        "input": inp
    }
    )
    headers = {
        "Content-Type":'application/json'
    }
    response = requests.request("POST",url, headers=headers, data=payload)
    
    return json.loads(response.text) # 将json格式的字符串解析为python对象（通常是字典或者列表）

# text应为list(str)
text = "要生成embedding的输入文本，字符串形式"
response = wenxin_embedding(text)
response

Access Token: 24.e7ec4eccf264198ae6345ff38e4a31db.2592000.1719387347.282335-75303463


{'id': 'as-ur211c6c5t',
 'object': 'embedding_list',
 'created': 1716795348,
 'data': [{'object': 'embedding',
   'embedding': [0.0649203211069107,
    0.022240828722715378,
    0.05262468382716179,
    0.015559284947812557,
    -0.024324769154191017,
    -0.09811308234930038,
    0.046487435698509216,
    -0.007674465421587229,
    -0.019077688455581665,
    0.012692584656178951,
    0.02938763238489628,
    -0.05194641649723053,
    -0.025697289034724236,
    0.033303529024124146,
    -0.0483882836997509,
    -0.010069891810417175,
    -0.02589067630469799,
    0.052594494074583054,
    -0.04848581925034523,
    -0.02996881492435932,
    0.0841166228055954,
    -0.06965041905641556,
    -0.09707511961460114,
    0.02312728390097618,
    -0.06109755113720894,
    -0.028014294803142548,
    0.03169978782534599,
    0.07513032853603363,
    0.05853687599301338,
    0.008251428604125977,
    -0.036047544330358505,
    -0.02093340829014778,
    0.01584080420434475,
    -0.1034810766577720

In [81]:
print('本次embedding id为：{}'.format(response['id']))
print('本次embedding产生的时间戳为：{}'.format(response['created']))
print('返回embedding类型为：{}'.format(response['object']))
print('embedding 长度为：{}'.format(len(response['data'][0]['embedding'])))
print('embedding(前10)为：{}'.format(response['data'][0]['embedding'][:10]))
# print(response['data'])
# response['data']是一个列表，里面包了一个字典，分别有opject,embedding(list),index等属性

本次embedding id为：as-ur211c6c5t
本次embedding产生的时间戳为：1716795348
返回embedding类型为：embedding_list
embedding 长度为：384
embedding(前10)为：[0.0649203211069107, 0.022240828722715378, 0.05262468382716179, 0.015559284947812557, -0.024324769154191017, -0.09811308234930038, 0.046487435698509216, -0.007674465421587229, -0.019077688455581665, 0.012692584656178951]


## ZhipuAI

In [2]:
# 获取环境变量
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [7]:
import os
from zhipuai import ZhipuAI

In [10]:
def zhipu_embedding(text:str):
    api_key = os.environ['ZHIPUAI_API_KEY']
    client = ZhipuAI(api_key=api_key)
    response = client.embeddings.create(
        model = 'embedding-2',
        input = text,
    )
    return response
text = '要生成embedding的输入文字，字符串的形式'
response = zhipu_embedding(text)

In [76]:
print('response的类型为：{}'.format(type(response)))
print('embedding类型为：{}'.format(response.object))
print('生成embedding的model是：{}'.format(type(response.model)))
print('生成embedding长度是：{}'.format(len(response.data[0].embedding)))
# print('生成embedding长度是：{}'.format(len(response.data[0]['embedding'])))
print(f'生成的embedding（前十）为：{response.data[0].embedding[:10]}')

response的类型为：<class 'zhipuai.types.embeddings.EmbeddingsResponded'>
embedding类型为：list
生成embedding的model是：<class 'str'>
生成embedding长度是：1024
生成的embedding（前十）为：[0.010366124100983143, 0.06125495210289955, 0.011689397506415844, 0.03422166407108307, 0.021308541297912598, -0.0298471599817276, -0.03161388635635376, -0.0014881051611155272, 0.018665099516510963, 0.06588269770145416]
