# 使用LangChain和GPT解锁结构化JSON数据：分步教程<a class="tocSkip">

## 初始化

首先安装所需的包

In [None]:
pip install langchain openai

In [11]:
# 导入你的OpenAI API Key

os.environ["OPENAI_API_KEY"] = "xxx" # 将xxx换成你的Key

## 导入所需的包

In [12]:
import os
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

## 生成数据

### 输出架构定义

In [13]:
# 使用字段描述定义一个新的Pydantic模型，并针对Twitter进行定制。

class TwitterUser(BaseModel):
    name: str = Field(description="Full name of the user.")
    handle: str = Field(description="Twitter handle of the user, without the '@'.")
    age: int = Field(description="Age of the user.")
    hobbies: List[str] = Field(description="List of hobbies of the user.")
    email: str = Field(description="Email address of the user.")
    bio: str = Field(description="Bio or short description about the user.")
    location: str = Field(description="Location or region where the user resides.")
    is_blue_badge: bool = Field(
        description="Boolean indicating if the user has a verified blue badge."
    )
    joined: str = Field(description="Date the user joined Twitter.")
    gender: str = Field(description="Gender of the user.")
    appearance: str = Field(description="Physical description of the user.")
    avatar_prompt: str = Field(
        description="Prompt for generating a photorealistic avatar image.\
        The image should capture the essence of the user's appearance description,\
        ideally in a setting that aligns with their interests or bio.\
        Use professional equipment to ensure high quality and fine details."
    )
    banner_prompt: str = Field(
        description="Prompt for generating a banner image.\
        This image should represent the user's hobbies, interests, or the essence of their bio.\
        It should be high-resolution and captivating, suitable for a Twitter profile banner."
    )

### 提示词模版

In [14]:
# 使用新模型实例化解析器。
parser = PydanticOutputParser(pydantic_object=TwitterUser)

# 更新提示词，以匹配新的查询以及你想要的格式
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(
            "answer the users question as best as possible.\n{format_instructions}\n{question}"
        )
    ],
    input_variables=["question"],
    partial_variables={
        "format_instructions": parser.get_format_instructions(),
    },
)

### 试试看！

In [15]:
chat_model = ChatOpenAI(
    model="gpt-3.5-turbo",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    max_tokens=1000
)

# Generate the input using the updated prompt.
user_query = (
    "Generate a detailed Twitter profile of a random realistic user with a diverse background, "
    "from any country in the world, original name, including prompts for images. Come up with "
    "real name, never use most popular placeholders like john smith and john doe."
)
_input = prompt.format_prompt(question=user_query)

output = chat_model(_input.to_messages())
parsed = parser.parse(output.content)
print(output.content)
print(parsed)

{
  "name": "Maria Santos",
  "handle": "mariasantos",
  "age": 32,
  "hobbies": ["Photography", "Cooking", "Hiking"],
  "email": "mariasantos@example.com",
  "bio": "Passionate about capturing moments through photography. Love exploring new recipes and hiking trails.",
  "location": "São Paulo, Brazil",
  "is_blue_badge": false,
  "joined": "2015-09-17",
  "gender": "Female",
  "appearance": "Brunette with hazel eyes and a warm smile.",
  "avatar_prompt": "Create a photorealistic avatar image that showcases my love for photography. Capture me with a camera in hand, surrounded by nature.",
  "banner_prompt": "Design a captivating banner image that reflects my hobbies and interests. Incorporate elements of photography, cooking, and hiking in a beautiful landscape."
}
name='Maria Santos' handle='mariasantos' age=32 hobbies=['Photography', 'Cooking', 'Hiking'] email='mariasantos@example.com' bio='Passionate about capturing moments through photography. Love exploring new recipes and hiking

In [19]:
print(_input)

messages=[HumanMessage(content='answer the users question as best as possible.\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"description": "Full name of the user.", "title": "Name", "type": "string"}, "handle": {"description": "Twitter handle of the user, without the \'@\'.", "title": "Handle", "type": "string"}, "age": {"description": "Age of the user.", "title": "Age", "type": "integer"}, "hobbies": {"description": "List of hobbies of the user.", "items": {"type": "string"}, "title": "Hobbies", "type": "array"}, "email": {"description": "Email address o

## 从文件中提取数据

首先安装读取PDF文件的包

In [20]:
pip install pypdf

Collecting pypdf
  Downloading pypdf-3.16.2-py3-none-any.whl (276 kB)
[K     |████████████████████████████████| 276 kB 4.1 MB/s eta 0:00:01
Installing collected packages: pypdf
Successfully installed pypdf-3.16.2
You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [21]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("elon.pdf")
document = loader.load()

document_query = "Create a profile based on this description: " + document[0].page_content

_input = prompt.format_prompt(question=document_query)
output = chat_model(_input.to_messages())
parsed = parser.parse(output.content)

print(parsed)

name='Elon Musk' handle='elonmusk' age=51 hobbies=['space exploration', 'electric vehicles', 'artificial intelligence', 'sustainable energy', 'tunnel construction', 'neural interfaces', 'Mars colonization', 'hyperloop transportation'] email='elonmusk@example.com' bio="Elon Musk, a 51-year-old male entrepreneur, inventor, and CEO, is best known for his ambitious goals in revolutionizing transportation and energy. Born in Pretoria, South Africa, Musk later moved to the United States to pursue higher education. He attended Queen's University in Kingston, Ontario, Canada for two years before transferring to the University of Pennsylvania. As a visionary with a normal build, short-cropped hair, and a trimmed beard, Musk often sports tailored suits or smart casual attire, giving him a confident yet approachable demeanor. Throughout his career, Musk has founded and led several successful companies, including SpaceX, Tesla, Neuralink, and The Boring Company. His interests span across various f