In [24]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate 
from langchain_core.output_parsers import StrOutputParser

In [3]:
llm = ChatOpenAI(model="gpt-5-nano")

In [4]:
prompts = [
    {"role": "system", "content": "You are a Databricks developer"},
    {"role": "user", "content": "Transform function in pyspark."}
]

response = llm.invoke(prompts)
print(response.content)

Transform is a higher-order function in PySpark for arrays. It applies a function to each element of an array column and returns an array of the results.

Key points
- Works on array columns (not maps). For maps, use transform_values or transform_keys.
- The lambda receives each element as a column expression and should return a column expression.
- Useful to avoid Python UDFs and stay within Spark’s optimized engine.

Basic usage
- Example data and import:
  from pyspark.sql import functions as F
  df = spark.createDataFrame(
      [(1, [1, 2, 3]), (2, [4, 5])],
      ["id", "nums"]
  )

- Double each element in an array:
  df2 = df.select(F.transform("nums", lambda x: x * 2).alias("doubled"))
  df2.show()

- Use a conditional inside the transform:
  df3 = df.select(F.transform("nums", lambda x: F.when(x > 2, x).otherwise(-1)).alias("cond"))
  df3.show()

- Using a SQL expression (alternative syntax):
  df4 = df.selectExpr("transform(nums, x -> x * 2) as doubled")
  df4.show()

Notes


In [6]:
print(response)

content='Transform is a higher-order function in PySpark for arrays. It applies a function to each element of an array column and returns an array of the results.\n\nKey points\n- Works on array columns (not maps). For maps, use transform_values or transform_keys.\n- The lambda receives each element as a column expression and should return a column expression.\n- Useful to avoid Python UDFs and stay within Spark’s optimized engine.\n\nBasic usage\n- Example data and import:\n  from pyspark.sql import functions as F\n  df = spark.createDataFrame(\n      [(1, [1, 2, 3]), (2, [4, 5])],\n      ["id", "nums"]\n  )\n\n- Double each element in an array:\n  df2 = df.select(F.transform("nums", lambda x: x * 2).alias("doubled"))\n  df2.show()\n\n- Use a conditional inside the transform:\n  df3 = df.select(F.transform("nums", lambda x: F.when(x > 2, x).otherwise(-1)).alias("cond"))\n  df3.show()\n\n- Using a SQL expression (alternative syntax):\n  df4 = df.selectExpr("transform(nums, x -> x * 2) 

#prompt template, Dynamic promot


In [9]:
prompts = ChatPromptTemplate.from_messages([
    {"role": "system", "content": "You are a {position}"},
    {"role": "user", "content": "{query}"}
])

In [11]:
print(prompts)

input_variables=['position', 'query'] input_types={} partial_variables={} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['position'], input_types={}, partial_variables={}, template='You are a {position}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='{query}'), additional_kwargs={})]


In [14]:
final_prompt = prompts.format_messages( position = 'AI DATA ENGINEER',
                                       query = 'Explain role of DATA AI ENGINEER in 1 line.' )
final_prompt

[SystemMessage(content='You are a AI DATA ENGINEER', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Explain role of DATA AI ENGINEER in 1 line.', additional_kwargs={}, response_metadata={})]

In [16]:
res = llm.invoke(final_prompt)

In [17]:
print(res.content)

A Data AI Engineer designs and maintains scalable data pipelines and ML systems, integrates data sources, and deploys and monitors AI models to enable data-driven decision making.


# chains runable

In [20]:
prompts.invoke({"position" : "databricks developer", "query" : "python hi kyu"})

ChatPromptValue(messages=[SystemMessage(content='You are a databricks developer', additional_kwargs={}, response_metadata={}), HumanMessage(content='python hi kyu', additional_kwargs={}, response_metadata={})])

In [21]:
res = llm.invoke(prompts.invoke({"position" : "Python", "query" : "python hi kyu"}))
print(res.content)

Hi! I’m your Python helper. What would you like to do with Python today? Here are a few quick options, or tell me your goal and I’ll tailor it:

- Quick hello world: print("Hello, world!")
- Learn basics: variables, data types, simple operations
- Control flow: if/else, loops
- Functions and how to write reusable code
- Work on a small problem or bug you have
- Help with a project or explain a Python concept

If you’re just starting, here are a couple of tiny examples:
- Variables and print:
  x = 5
  y = 2
  print(x + y)  # 7
- If statement:
  if x > 0:
      print("positive")
- Loop:
  for i in range(3):
      print(i)
- Function:
  def greet(name):
      return f"Hello, {name}!"

How would you like to proceed? Tell me your goal, your current level, and the environment you’re using (REPL, script, Jupyter, etc.).


In [None]:
out = StrOutputParser()

In [None]:
chains = prompts | llm | out

In [23]:
chains

ChatPromptTemplate(input_variables=['position', 'query'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['position'], input_types={}, partial_variables={}, template='You are a {position}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='{query}'), additional_kwargs={})])
| ChatOpenAI(profile={'max_input_tokens': 272000, 'max_output_tokens': 128000, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, client=<openai.resources.chat.completions.completions.Completions object at 0x10f396d50>, async_client=<openai.resources.chat.completions.comple

In [None]:
chain.invoke({"position" : "databricks developer", "query" : "python hi kyu"})