## 1. Define Your Data Model

In [67]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_experimental.tabular_synthetic_data.base import SyntheticDataGenerator
from langchain_experimental.tabular_synthetic_data.openai import (
    create_openai_data_generator,
)
from langchain_experimental.tabular_synthetic_data.prompts import (
    SYNTHETIC_FEW_SHOT_PREFIX,
    SYNTHETIC_FEW_SHOT_SUFFIX,
)
from langchain_ollama import ChatOllama

from langchain_experimental.synthetic_data import (
    DatasetGenerator,
    create_data_generation_chain,
)

## 2. Sample Data

In [60]:
from datetime import datetime


class MarketData(BaseModel):
    date: datetime  # Store date as a datetime object
    symbol: str
    open: float
    high: float
    low: float
    close: float
    volume_btc: float
    volume: float

    @property
    def get_unix_timestamp(self):
        # Convert datetime to Unix timestamp
        return int(self.date.timestamp())

In [61]:
examples = [
    {
        "date": "2019-10-17",
        "symbol": "BTCUSD",
        "open": 7993.54,
        "high": 8070,
        "low": 7937.01,
        "close": 8034.32,
        "volume_btc": 1570.08,
        "volume": 12537245.82,
    },
    {
        "date": "2019-10-16",
        "symbol": "BTCUSD",
        "open": 8162.44,
        "high": 8171.59,
        "low": 7908.86,
        "close": 7993.54,
        "volume_btc": 8123.72,
        "volume": 65132771.81,
    },
    {
        "date": "2019-10-15",
        "symbol": "BTCUSD",
        "open": 8355,
        "high": 8420,
        "low": 8085.65,
        "close": 8162.44,
        "volume_btc": 7919.56,
        "volume": 65109952.98,
    },
    {
        "date": "2019-10-14",
        "symbol": "BTCUSD",
        "open": 8282.97,
        "high": 8409,
        "low": 8215.66,
        "close": 8355,
        "volume_btc": 4401.82,
        "volume": 36575829.05,
    },
    {
        "date": "2019-10-13",
        "symbol": "BTCUSD",
        "open": 8309.03,
        "high": 8469.02,
        "low": 8146.47,
        "close": 8282.97,
        "volume_btc": 5459.53,
        "volume": 45492326.33,
    },
    {
        "date": "2019-10-12",
        "symbol": "BTCUSD",
        "open": 8267.33,
        "high": 8425,
        "low": 8261.46,
        "close": 8309.03,
        "volume_btc": 3001.53,
        "volume": 25023584.91,
    },
    {
        "date": "2019-10-11",
        "symbol": "BTCUSD",
        "open": 8587.5,
        "high": 8826,
        "low": 8226,
        "close": 8267.33,
        "volume_btc": 11975.39,
        "volume": 101238415.25,
    },
    {
        "date": "2019-10-10",
        "symbol": "BTCUSD",
        "open": 8590,
        "high": 8660,
        "low": 8452.5,
        "close": 8587.5,
        "volume_btc": 7892.41,
        "volume": 67531649.6,
    },
    {
        "date": "2019-10-09",
        "symbol": "BTCUSD",
        "open": 8180,
        "high": 8709,
        "low": 8121,
        "close": 8590,
        "volume_btc": 13865.37,
        "volume": 117143751.99,
    },
    {
        "date": "2019-10-08",
        "symbol": "BTCUSD",
        "open": 8209,
        "high": 8342.97,
        "low": 8107.26,
        "close": 8180,
        "volume_btc": 5808.84,
        "volume": 47674867.98,
    },
]

In [68]:
model = ChatOllama(model="llama3.1", temperature=1)

In [69]:
generator = DatasetGenerator(model, {"style": "informal", "minimal length": 500})
dataset = generator(examples)

In [None]:
dataset

In [71]:
import pandas as pd

df = pd.DataFrame(dataset)
df.to_csv("data.csv", index=False)

## 3. Craft a Prompt Template

In [62]:
OPENAI_TEMPLATE = PromptTemplate(
    input_variables=[
        "date",
        "symbol",
        "open",
        "high",
        "low",
        "close",
        "volume_btc",
        "volume",
    ],
    template="""
    Date: {date}, Symbol: {symbol}, Open: {open}, High: {high}, Low: {low}, Close: {close}, 
    Volume BTC: {volume_btc}, Volume: {volume}
    """,
)

prompt_template = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

## 4. Creating the Data Generator

In [63]:
synthetic_data_generator = SyntheticDataGenerator(
    llm=ChatOllama(
        model="llama3.1", temperature=1
    ),  # You'll need to replace with your actual Language Model instance
    template=prompt_template,
)

## 5. Generate Synthetic Data

In [64]:
synthetic_results = synthetic_data_generator.generate(
    subject="Trading Market data",
    extra="This is trading data date symbol open high low close volumebtc volume. this is timeseries data so stick to one time step.",
    runs=10,
)

KeyError: 'close'

In [11]:
synthetic_results

NameError: name 'synthetic_results' is not defined