In [0]:
!pip install -q "daft[unity, deltalake]" pydantic openai numpy pillow python-dotenv

In [0]:
import os
import daft 
from daft.unity_catalog import UnityCatalog
from dotenv import load_dotenv

load_dotenv()

# Define Configuration 
DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
DATABRICKS_ENDPOINT = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Configure UnityCatalog
unity = UnityCatalog(
    endpoint=DATABRICKS_ENDPOINT,
    token=DATABRICKS_TOKEN,
)

# Configure OpenAI Provider
daft.set_provider("openai", api_key=OPENAI_API_KEY)

In [0]:
from daft.catalog import Table

# Load the Unity Catalog Table
uc_table = unity.load_table("jaytest-unity.openai.nebulabyte_e_commerce_customer_support_conversations")

# Read the Table and Materialize the data. 
df = Table.from_unity(uc_table).read()
df.collect()

In [0]:
from daft import col
from daft.functions import prompt
from pydantic import BaseModel, Field

class Classification(BaseModel):
    intent: str = Field(description="The intent of the customer.")
    sentiment: str = Field(description="The sentiment of the customer.")
    product_category: str = Field(description="The type of product the customer is concerned with")

df = df.with_column(
    "classification", 
    prompt(
        messages=col("conversation"),
        system_message="You are a helpful assistant that classifies customer intents and sentiments.",
        model="gpt-5-nano",
        provider="openai",
        use_chat_completions=True,
        return_format=Classification,
    )
)


In [0]:
df.select("conversation", col("classification").unnest()).show()

In [0]:
from pydantic import BaseModel, Field

class CustomerSupportClassification(BaseModel):
    issue_area: str = Field(description="The main area of the issue, e.g., 'Login and Account'.")
    issue_category: str = Field(description="The category of the issue, e.g., 'Mobile Number and Email Verification'.")
    issue_sub_category: str = Field(description="The sub-category of the issue, e.g., 'Verification requirement for mobile number or email address")
    issue_category_sub_category: str = Field(description="Combined category and sub-category")
    customer_sentiment: str = Field(description="The sentiment of the customer, e.g., 'neutral'.")
    product_category: str = Field(description="The category of the product, e.g., 'Appliances'.")
    product_sub_category: str = Field(description="The sub-category of the product, e.g., 'Oven Toaster Grills (OTG)'.")
    issue_complexity: str = Field(description="The complexity of the issue, e.g., 'medium'.")
    agent_experience_level: str = Field(description="The experience level of the agent, e.g., 'junior'.")
    agent_experience_level_desc: str = Field(description="Description of the agent's experience level, e.g., 'handles customer inquiries independently, possess solid troubleshooting skills, and seek guidance from more experienced team members when needed.'")


df = df.select("conversation").with_column(
    "customer_support_classification", 
    prompt(
        messages=col("conversation"),
        system_message="You are customer support supervisor that investigates customer support conversations and classifies them into different categories.",
        model="gpt-5-nano",
        provider="openai",
        use_chat_completions=True,
        return_format=CustomerSupportClassification,
    )
)

In [0]:
df_append = df.select(col("customer_support_classification").unnest(), "conversation").limit(100).collect()

In [0]:
df_append.show()

In [0]:
df_append.to_pandas()

In [0]:
Table.from_unity(uc_table).write(df_final, mode="append")

In [0]:
# Or write to delta with spark via pandas 