In [1]:
pip install -U langchain langchain_experimental openai

Defaulting to user installation because normal site-packages is not writeable
Collecting openai
  Using cached openai-1.23.6-py3-none-any.whl (311 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.23.2
    Uninstalling openai-1.23.2:
      Successfully uninstalled openai-1.23.2
Successfully installed openai-1.23.6
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
os.environ["OPENAI_API_KEY"] = "your_api_key"


In [3]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.pydantic_v1 import BaseModel
from langchain_experimental.tabular_synthetic_data.base import SyntheticDataGenerator
from langchain_experimental.tabular_synthetic_data.openai import create_openai_data_generator, OPENAI_TEMPLATE
from langchain_experimental.tabular_synthetic_data.prompts import SYNTHETIC_FEW_SHOT_SUFFIX, SYNTHETIC_FEW_SHOT_PREFIX


In [4]:
class Artists(BaseModel):
    artist_id: int
    full_name: str
    first_name: str
    middle_name: str
    last_name: str
    nationality: str
    style: str
    birth: int
    death: int

In [5]:
examples = [
    
    {"example":""""artist_id: 1,full_name: Pierre-Auguste Renoir,first_name: Pierre,middle_name: Auguste,last_name: Renoir,nationality: French,style: Impressionist,birth: 1841,death: 1919"""},
    {"example":"""artist_id: 2,full_name: Alexandre Cabanel,first_name: Alexandre,middle_name: ,last_name: Cabanel,nationality: French,style: Classicist,birth: 1823,death: 1889"""},
    {"example":"""artist_id: 3,full_name: James Ensor,first_name: James,middle_name: ,last_name: Ensor,nationality: Belgian,style: Expressionist,birth: 1860,death: 1949"""},
    {"example":"""artist_id: 4,full_name: Lemuel Francis Abbott,first_name: Lemuel,middle_name: Francis,last_name: Abbott,nationality: English,style: Portraitist,birth: 1760,death: 1803"""},
    {"example":"""artist_id: 5,full_name: Mary Cassatt,first_name: Mary,middle_name: ,last_name: Cassatt,nationality: American,style: Impressionist,birth: 1844,death: 1926"""},
    {"example":"""artist_id: 6,full_name: Frank Weston Benson,first_name: Frank,middle_name: Weston,last_name: Benson,nationality: American,style: Impressionist,birth: 1862,death:1951"""},
    {"example":"""artist_id: 7,full_name: Jean Raoux,first_name: Jean,middle_name: ,last_name: Raoux,nationality: French,style: Rococo,birth: 1677,death:1734"""},
    {"example":"""artist_id: 8,full_name: Joseph Kumar Ducreux,first_name: Joseph,middle_name: Kumar,last_name: Ducreux,nationality: French,style: Portraitist,birth: 1735,death:  1802"""},
    {"example":"""artist_id: 9,full_name: Rembrandt Peale,first_name: Rembrandt,middle: ,last_name: Peale,nationality: American,style: Neoclassical,birth: 1778,death: 1860"""},
    {"example":"""artist_id: 10,full_name: Vasiliy Polenov,first_name:Vasiliy,middle: ,last_name: Polenov,nationality: Russian,style: Landscape Art,birth: 1844,death: 1927"""},
     {"example":"""artist_id: 11,full_name: Thomas Waterman Wood,first_name: Thomas,middle: Waterman,last_name: Wood,nationality: American,style: Colonial,birth: 1823,death: 1903"""}
]

In [6]:
OPENAI_TEMPLATE = PromptTemplate(input_variables=["example"], template="{example}")

prompt_template = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

In [7]:
synthetic_data_generator = create_openai_data_generator(
    output_schema=Artists,
    llm=ChatOpenAI(temperature=1),
    prompt=prompt_template,
)

  warn_deprecated(


In [8]:
synthetic_results = synthetic_data_generator.generate(
    subject="artists",
    extra="Embrace randomness when selecting names. If a full name comprises two strings, allocate the first and last name into their respective columns; however, if it contains three strings, distribute them across first, middle, and last name columns. ensure that the date of death column never exceed year 2023. Birth dates should exclusively span the 20th century. The artist id column should have sequentially ordered ids .",
    runs=10
    )

In [9]:
type(synthetic_results)

list

In [10]:
len(synthetic_results)


10

In [11]:
synthetic_results

[Artists(artist_id=1, full_name='John Smith', first_name='John', middle_name='', last_name='Smith', nationality='American', style='Abstract Art', birth=1995, death=2020),
 Artists(artist_id=12, full_name='Elizabeth Hughes', first_name='Elizabeth', middle_name='', last_name='Hughes', nationality='Australian', style='Abstract Expressionist', birth=1978, death=2017),
 Artists(artist_id=13, full_name='Alexander Burke', first_name='Alexander', middle_name='', last_name='Burke', nationality='Irish', style='Modern Art', birth=1987, death=2021),
 Artists(artist_id=14, full_name='Ella Davies', first_name='Ella', middle_name='', last_name='Davies', nationality='British', style='Surrealism', birth=1925, death=2002),
 Artists(artist_id=15, full_name='Alice Marshall', first_name='Alice', middle_name='', last_name='Marshall', nationality='British', style='Abstract Expressionist', birth=1980, death=2016),
 Artists(artist_id=16, full_name='Lena Fischer', first_name='Lena', middle_name='', last_name='F

In [12]:
import pandas as pd

# Create a list of dictionaries from the objects
synthetic_data = []
for item in synthetic_results:
    synthetic_data.append({
        'artist_id': item.artist_id,
        'full_name': item.full_name,
        'first_name': item.first_name,
        'middle_name': item.middle_name,
        'last_name': item.last_name,
        'nationality': item.nationality,
        'style': item.style,
        'birth':item.birth,
        'death':item.death
    })

# Create a Pandas DataFrame from the list of dictionaries
synthetic_df = pd.DataFrame(synthetic_data)

# Display the DataFrame
print(type(synthetic_df))
synthetic_df


<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,artist_id,full_name,first_name,middle_name,last_name,nationality,style,birth,death
0,1,John Smith,John,,Smith,American,Abstract Art,1995,2020
1,12,Elizabeth Hughes,Elizabeth,,Hughes,Australian,Abstract Expressionist,1978,2017
2,13,Alexander Burke,Alexander,,Burke,Irish,Modern Art,1987,2021
3,14,Ella Davies,Ella,,Davies,British,Surrealism,1925,2002
4,15,Alice Marshall,Alice,,Marshall,British,Abstract Expressionist,1980,2016
5,16,Lena Fischer,Lena,,Fischer,German,Expressionism,1956,2020
6,17,Isabella Ross,Isabella,,Ross,Italian,Baroque,1929,2001
7,18,Sophia Nguyen,Sophia,,Nguyen,Vietnamese,Impressionism,1968,2021
8,19,Mia Patel,Mia,,Patel,Indian,Realism,1972,2020
9,20,Maksim Ivanov,Maksim,,Ivanov,Russian,Realism,1978,2022


In [13]:
synthetic_df.shape

(10, 9)