diff --git a/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/README.md b/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/README.md
index 1b3934796..af7de6df6 100644
--- a/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/README.md
+++ b/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/README.md
@@ -16,16 +16,16 @@
 
 | Notebook                                          | Domain              | Description                                                     |
 |---------------------------------------------------|---------------------|-----------------------------------------------------------------|
-| [person-sampler-tutorial.ipynb](./advanced/person-samplers/person-sampler-tutorial.ipynb) | Persona Samplers    | Generate realistic personas using the person sampler |
-| [clinical-trials.ipynb](./advanced/healthcare-datasets/clinical-trials.ipynb) | Healthcare          | Build synthetic clinical trial datasets with realistic PII for testing data protection |
-| [insurance-claims.ipynb](./advanced/healthcare-datasets/insurance-claims.ipynb) | Healthcare          | Create synthetic insurance claims datasets with realistic claim data and processing information |
-| [physician-notes-with-realistic-personal-details.ipynb](./advanced/healthcare-datasets/physician-notes-with-realistic-personal-details.ipynb) | Healthcare          | Generate realistic patient data and physician notes with embedded personal information |
-| [w2-dataset.ipynb](./advanced/forms/w2-dataset.ipynb) | Forms & Documents   | Generate synthetic W-2 tax form datasets with realistic employee and employer information |
-| [multi-turn-conversation.ipynb](./advanced/multi-turn-chat/multi-turn-conversation.ipynb) | Conversational AI   | Build synthetic conversational data with realistic person details and multi-turn dialogues |
-| [visual-question-answering-using-vlm.ipynb](./advanced/multimodal/visual-question-answering-using-vlm.ipynb) | Multimodal          | Create visual question answering datasets using Vision Language Models |
-| [product-question-answer-generator.ipynb](./advanced/qa-generation/product-question-answer-generator.ipynb) | Q&A Generation      | Build product information datasets with corresponding questions and answers |
-| [generate-rag-evaluation-dataset.ipynb](./advanced/rag-examples/generate-rag-evaluation-dataset.ipynb) | RAG & Retrieval     | Generate diverse RAG evaluation datasets for testing retrieval-augmented generation systems |
-| [reasoning-traces.ipynb](./advanced/reasoning/reasoning-traces.ipynb) | Reasoning           | Build synthetic reasoning traces to demonstrate step-by-step problem-solving processes |
-| [text-to-python.ipynb](./advanced/text-to-code/text-to-python.ipynb) | Text-to-Code        | Generate Python code from natural language instructions with validation and evaluation |
-| [text-to-python-evol.ipynb](./advanced/text-to-code/text-to-python-evol.ipynb) | Text-to-Code        | Build advanced Python code generation with evolutionary improvements and iterative refinement |
-| [text-to-sql.ipynb](./advanced/text-to-code/text-to-sql.ipynb) | Text-to-Code        | Create SQL queries from natural language descriptions with validation and testing |
+| [person-sampler-tutorial.ipynb](./person-samplers/person-sampler-tutorial.ipynb) | Persona Samplers    | Generate realistic personas using the person sampler |
+| [clinical-trials.ipynb](./healthcare-datasets/clinical-trials.ipynb) | Healthcare          | Build synthetic clinical trial datasets with realistic PII for testing data protection |
+| [insurance-claims.ipynb](./healthcare-datasets/insurance-claims.ipynb) | Healthcare          | Create synthetic insurance claims datasets with realistic claim data and processing information |
+| [physician-notes-with-realistic-personal-details.ipynb](./healthcare-datasets/physician-notes-with-realistic-personal-details.ipynb) | Healthcare          | Generate realistic patient data and physician notes with embedded personal information |
+| [w2-dataset.ipynb](./forms/w2-dataset.ipynb) | Forms & Documents   | Generate synthetic W-2 tax form datasets with realistic employee and employer information |
+| [multi-turn-conversation.ipynb](./multi-turn-chat/multi-turn-conversation.ipynb) | Conversational AI   | Build synthetic conversational data with realistic person details and multi-turn dialogues |
+| [visual-question-answering-using-vlm.ipynb](./multimodal/visual-question-answering-using-vlm.ipynb) | Multimodal          | Create visual question answering datasets using Vision Language Models |
+| [product-question-answer-generator.ipynb](./qa-generation/product-question-answer-generator.ipynb) | Q&A Generation      | Build product information datasets with corresponding questions and answers |
+| [generate-rag-evaluation-dataset.ipynb](./rag-examples/generate-rag-evaluation-dataset.ipynb) | RAG & Retrieval     | Generate diverse RAG evaluation datasets for testing retrieval-augmented generation systems |
+| [reasoning-traces.ipynb](./reasoning/reasoning-traces.ipynb) | Reasoning           | Build synthetic reasoning traces to demonstrate step-by-step problem-solving processes |
+| [text-to-python.ipynb](./text-to-code/text-to-python.ipynb) | Text-to-Code        | Generate Python code from natural language instructions with validation and evaluation |
+| [text-to-python-evol.ipynb](./text-to-code/text-to-python-evol.ipynb) | Text-to-Code        | Build advanced Python code generation with evolutionary improvements and iterative refinement |
+| [text-to-sql.ipynb](./text-to-code/text-to-sql.ipynb) | Text-to-Code        | Create SQL queries from natural language descriptions with validation and testing |
diff --git a/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/forms/w2-dataset.ipynb b/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/forms/w2-dataset.ipynb
index 8cd491e5e..b3199b39a 100644
--- a/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/forms/w2-dataset.ipynb
+++ b/nemo/NeMo-Data-Designer/self-hosted-tutorials/community-contributions/forms/w2-dataset.ipynb
@@ -1,592 +1,4 @@
 {
-<<<<<<< HEAD
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "id": "00fcbf4b",
-            "metadata": {},
-            "source": [
-                "# 🧾 NeMo Data Designer: W-2 Dataset Generator"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "e8ca7bf9",
-            "metadata": {},
-            "source": [
-                "> ⚠️ **Warning**: NeMo Data Designer is current in Early Release and is not recommended for production use.\n",
-                ">\n",
-                "> **Note**: In order to run this notebook, you must have the NeMo Data Designer microservice deployed locally via docker compose. See the [deployment guide](http://docs.nvidia.com/nemo/microservices/latest/set-up/deploy-as-microservices/data-designer/docker-compose.html) for more details.\n",
-                ">\n",
-                "> Alternatively, you can use the [NeMo Data Designer managed service](https://build.nvidia.com/nemo/data-designer). Please refer the [intro-tutorials](../../intro-tutorials/1-the-basics.ipynb) on how to connect to it. \n",
-                ">\n",
-                "> **Note**: If you are using the NeMo Data Designer managed service, you will only be able to launch preview jobs. You will not be able to launch jobs using the `create` method."
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "016ba3fd",
-            "metadata": {},
-            "source": [
-                "In this notebook we demonstrate how you can combine numerical samplers, the person sampler and LLMs to create a synthetic dataset of W-2 forms (US Wage & Tax Statements).\n",
-                "\n",
-                "### Generating realistic numerical values\n",
-                "\n",
-                "We will use generate numerical fields using statistics published by the IRS for the year 2021:\n",
-                "\n",
-                "- https://www.irs.gov/pub/irs-pdf/p5385.pdf\n",
-                "\n",
-                "### Generating realistic taxpayers\n",
-                "\n",
-                "We will use the person sampler to generate realistic US taxpayers. When the US locale is chosen, statistics for generated persons reflect real-world census data.\n"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "ed91fb59",
-            "metadata": {},
-            "source": [
-                "\n",
-                "#### 💾 Install dependencies\n",
-                "\n",
-                "**IMPORTANT** 👉 If you haven't already, follow the instructions in the [README](../../README.md) to install the necessary dependencies. Note you may need to restart your kernel after setting up the environment.\n"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "52263153",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "from nemo_microservices import NeMoMicroservices\n",
-                "from nemo_microservices.beta.data_designer import (\n",
-                "    DataDesignerConfigBuilder,\n",
-                "    DataDesignerClient,\n",
-                ")\n",
-                "from nemo_microservices.beta.data_designer.config import columns as C\n",
-                "from nemo_microservices.beta.data_designer.config import params as P"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "94213d7c",
-            "metadata": {},
-            "source": [
-                "### ⚙️ Initialize the NeMo Data Designer Client\n",
-                "\n",
-                "- The data designer client is responsible for submitting generation requests to the Data Designer microservice.\n",
-                "- In this notebook, we connect to a local deployment of data designer. You can deploy your own instance of data designer by following the deployment instructions [here](https://docs.nvidia.com/nemo/microservices/latest/set-up/deploy-as-microservices/data-designer/docker-compose.html).\n"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "834f3a96",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "data_designer_client = DataDesignerClient(client=NeMoMicroservices(base_url=\"http://localhost:8080\"))"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "fadf0f19",
-            "metadata": {},
-            "source": [
-                "### 🏗️ Initialize the Data Designer Config Builder\n",
-                "\n",
-                "- The Data Designer config defines the dataset schema and generation process.\n",
-                "\n",
-                "- The config builder provides an intuitive interface for building this configuration.\n",
-                "\n",
-                "- You must provide a list of model configs to the builder at initialization.\n",
-                "\n",
-                "- This list contains the models you can choose from (via the `model_alias` argument) during the generation process.\n"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 47,
-            "id": "a214afcd",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# We specify the endpoint of the model during deployment using the model_provider_registry.\n",
-                "model_id = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n",
-                "model_alias = \"nemotron-nano-9b-v2\""
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "dcc34745",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "config_builder = DataDesignerConfigBuilder(\n",
-                "    model_configs=[\n",
-                "        P.ModelConfig(\n",
-                "            alias=model_alias,\n",
-                "            provider=\"nvidiabuild\",\n",
-                "            model=model_id,\n",
-                "            inference_parameters=P.InferenceParameters(\n",
-                "                max_tokens=1024,\n",
-                "                temperature=0.6,\n",
-                "                top_p=0.9,\n",
-                "            ),\n",
-                "            is_reasoner=True\n",
-                "        ),\n",
-                "    ]\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "bbcb3538",
-            "metadata": {},
-            "source": [
-                "## Setting up taxpayer and employer sampling"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "149e2abf",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Create a samplers for an American taxpayer (employee), and employer.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"taxpayer\",\n",
-                "        type=P.SamplerType.PERSON,\n",
-                "        params=P.PersonSamplerParams(\n",
-                "            locale=\"en_US\",\n",
-                "            age_range=[18, 75]\n",
-                "        ),\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "# While the employer isn't technically a \"person\", we'll use the person sampler for generating the employer address.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"employer\",\n",
-                "        type=P.SamplerType.PERSON,\n",
-                "        params=P.PersonSamplerParams(\n",
-                "            locale=\"en_US\",\n",
-                "        ),\n",
-                "    )\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "28397d74",
-            "metadata": {},
-            "source": [
-                "## Defining the fields\n",
-                "\n",
-                "We will focus on the following:\n",
-                "- Box 1 (Wages, tips, and other compensation)\n",
-                "- Box 2 (Federal income tax withheld)\n",
-                "- Box 3 (Social security wages)\n",
-                "- Box 4 (Social security tax withheld)\n",
-                "- Box 5 (Medicare wages and tips)\n",
-                "- Box 6 (Medicare tax withheld)\n",
-                "- Box 7 (Social security tips)\n",
-                "- Box a (Employee's social security number)\n",
-                "- Box c (Employer's name, address and zip code)\n",
-                "- Box e (Employee's fist name, initial, and last name)\n",
-                "- Box f (Employee's address and zip code)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "060f6c0f",
-            "metadata": {},
-            "source": [
-                "### Numerical fields\n",
-                "\n",
-                "Here, we'll define how to generate numerical samples for the currency fields of the W-2 (Boxes 1-7). We'll use the W-2 statistics from the IRS linked above to generate realistic samples."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "be7e98e1",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "### BOX 1 (TOTAL WAGES, TIPS, AND OTHER COMPENSATION) ###\n",
-                "\n",
-                "# From Page 6 of the IRS Statistics, we know that  276,388,660 / 277,981,454 W-2 forms had a non-zero value for Box 1 (99.4%).\n",
-                "# From Page 8 of the IRS Statistics, we know that the sum of this field across all forms was 9,920,000,000*$1000 = $9,920,000,000,000 dollars.\n",
-                "# Since there were 276,388,660 non-zero Box 1 values, the average value of Box 1 was $9,920,000,000,000 / 276,388,660 = $35,891.49.\n",
-                "# We will use a Bernoulli-Exponential mixture distribution to sample values for this field.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"box_1_wages_tips_other_compensation\",\n",
-                "        type=P.SamplerType.BERNOULLI_MIXTURE,\n",
-                "        params=P.BernoulliMixtureSamplerParams(\n",
-                "            p=0.994,\n",
-                "            dist_name=\"expon\",\n",
-                "            dist_params={\"scale\": 35891.49}\n",
-                "        ),\n",
-                "        convert_to=\"int\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 2 (FEDERAL INCOME TAX WITHHELD) ###\n",
-                "\n",
-                "# Note: The calculations below are a simplification based on the assumption that this is an individual's only W-2.\n",
-                "# In practice, the taxable income is based on all wages for individuals with multiple W-2s.\n",
-                "\n",
-                "# 2022 standard deduction\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"standard_deduction\",\n",
-                "        expr=\"{% if taxpayer.marital_status == 'married_present' %}25900{% else %}12950{% endif %}\",\n",
-                "        convert_to=\"float\",\n",
-                "    ),\n",
-                ")\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"taxable_income\",\n",
-                "        expr=\"{{ [0, box_1_wages_tips_other_compensation - standard_deduction]|max }}\",\n",
-                "        convert_to=\"float\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "# We'll sum over the tax incurred at each 2022 tax bracket.\n",
-                "# For simplicity, we'll assume that the taxpayer is single here.\n",
-                "BRACKETS = [\n",
-                "    {\"name\": \"bracket1\", \"rate\": 0.10, \"max\": 10275, \"min\": 0},\n",
-                "    {\"name\": \"bracket2\", \"rate\": 0.12, \"max\": 41775, \"min\": 10275},\n",
-                "    {\"name\": \"bracket3\", \"rate\": 0.22, \"max\": 89075, \"min\": 41775},\n",
-                "    {\"name\": \"bracket4\", \"rate\": 0.24, \"max\": 170050, \"min\": 89075},\n",
-                "    {\"name\": \"bracket5\", \"rate\": 0.32, \"max\": 215950, \"min\": 170050},\n",
-                "    {\"name\": \"bracket6\", \"rate\": 0.35, \"max\": 539900, \"min\": 215950},\n",
-                "    {\"name\": \"bracket7\", \"rate\": 0.37, \"max\": 10000000000000, \"min\": 539900},\n",
-                "]\n",
-                "for bracket in BRACKETS:\n",
-                "    expression = f\"{bracket['rate']}*([[taxable_income,{bracket['max']}]|min - {bracket['min']}, 0] | max)\"\n",
-                "    config_builder.add_column(\n",
-                "        C.ExpressionColumn(\n",
-                "            name=bracket[\"name\"],\n",
-                "            expr=\"{{ \" + expression + \" }}\",\n",
-                "            convert_to=\"float\",\n",
-                "        )\n",
-                "    )\n",
-                "\n",
-                "# Sum the tax brackets to get the total withheld, on average\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"mean_tax_liability\",\n",
-                "        expr=\"{{ bracket1 + bracket2 + bracket3 + bracket4 + bracket5 + bracket6 + bracket7 }}\",\n",
-                "        convert_to=\"int\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "# Add some noise to get the actual withholding\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"tax_liability_noise\",\n",
-                "        type=P.SamplerType.GAUSSIAN,\n",
-                "        params=P.GaussianSamplerParams(mean=1, stddev=0.1),\n",
-                "    )\n",
-                ")\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_2_federal_income_tax_withheld\",\n",
-                "        expr=\"{{ (mean_tax_liability * tax_liability_noise) | int }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 3 (SOCIAL SECURITY WAGES) ###\n",
-                "\n",
-                "# From Page 8 of the IRS Statistics, we know that social security wages are, on average, 8,150,000,000/9,920,000,000 ~= 82.16% of total wages.\n",
-                "# We'll sample a ratio from a normal distribution with mean 0.8216 and standard deviation 0.2.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"social_security_wages_ratio\",\n",
-                "        type=P.SamplerType.GAUSSIAN,\n",
-                "        params=P.GaussianSamplerParams(mean=0.8216, stddev=0.2),\n",
-                "        convert_to=\"float\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_3_social_security_wages\",\n",
-                "        expr=\"{{ (box_1_wages_tips_other_compensation * social_security_wages_ratio) | int }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 4 (SOCIAL SECURITY TAX WITHHELD) ###\n",
-                "\n",
-                "# In 2022, social security tax was withheld at a rate of 6.2% of social security wages, up to a maximum of $147,000.\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_4_social_security_tax_withheld\",\n",
-                "        expr=\"{{ (([box_3_social_security_wages, 147000]|min) * 0.062) | int }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 5 (MEDICARE WAGES AND TIPS) ###\n",
-                "\n",
-                "# From Page 8 of the IRS Statistics, we know that Medicare wages and tips are, on average, 10,300,000,000/9,920,000,000 ~= 103.8% of total wages.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"medicare_wages_and_tips_ratio\",\n",
-                "        type=P.SamplerType.GAUSSIAN,\n",
-                "        params=P.GaussianSamplerParams(mean=1.038, stddev=0.2),\n",
-                "        convert_to=\"float\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_5_medicare_wages_and_tips\",\n",
-                "        expr=\"{{ (box_1_wages_tips_other_compensation * medicare_wages_and_tips_ratio) | int }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 6 (MEDICARE TAX WITHHELD) ###\n",
-                "\n",
-                "# The standard employee Medicare tax rate in 2022 was 1.45% on all Medicare wages.\n",
-                "# The Additional Medicare Tax rate in 2022 was 0.9% on all Medicare wages in excess of $200,000.\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_6_medicare_tax_withheld\",\n",
-                "        expr=\"{{ ((box_5_medicare_wages_and_tips * 0.0145) + (([box_5_medicare_wages_and_tips - 200000, 0]|max) * 0.009)) | int }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX 7 (SOCIAL SECURITY TIPS) ###\n",
-                "\n",
-                "# From Page 6 of the IRS Statistics, we know that only 12,620,946 / 277,981,454 W-2 forms had a non-zero value for Box 7 (4.54%).\n",
-                "# From Page 8 of the IRS Statistics, we know that the sum of this field across all forms was 55,897,014*$1000 = $55,897,014,000.\n",
-                "# Since there were 12,620,946 non-zero Box 7 values, the average value of Box 7 was $55,897,014,000 / 12,620,946 = $4428.91.\n",
-                "# We will use a Bernoulli-Exponential mixture distribution to sample values for this field.\n",
-                "config_builder.add_column(\n",
-                "    C.SamplerColumn(\n",
-                "        name=\"box_7_social_security_tips\",\n",
-                "        type=P.SamplerType.BERNOULLI_MIXTURE,\n",
-                "        params=P.BernoulliMixtureSamplerParams(\n",
-                "            p=0.0454,\n",
-                "            dist_name=\"expon\",\n",
-                "            dist_params={\"scale\": 4428.91}\n",
-                "        ),\n",
-                "        convert_to=\"int\",\n",
-                "    )\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "f1cbd72b",
-            "metadata": {},
-            "source": [
-                "### Non-numerical fields\n",
-                "\n",
-                "The remaining fields contain information about the employee (taxpayer) and the employer. We'll use the person sampler in combination with an LLM to generate values here."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "bf3ba45b",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "### BOX A (EMPLOYEE'S SOCIAL SECURITY NUMBER) ###\n",
-                "\n",
-                "# We can use the ssn field of the person sampler to generate a valid SSN for the employee.\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_a_employee_ssn\",\n",
-                "        expr=\"{{ taxpayer.ssn }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX C (EMPLOYER'S NAME, ADDRESS AND ZIP CODE) ###\n",
-                "\n",
-                "# We want to generate a realistic company name.\n",
-                "# We'll start by generating a list of industries, expanded with magic.\n",
-                "config_builder.add_column(\n",
-                "    C.LLMTextColumn(\n",
-                "        name=\"employer_business\",\n",
-                "        model_alias=model_alias,\n",
-                "        system_prompt=(\"You are assisting a user generate synthetic W-2 forms.\"\n",
-                "                       \"You must generate a realistic industry category for the employer\"\n",
-                "                       \"eg: software, health insurance, shoe store, restaurant, plumbing\"),\n",
-                "        prompt=(\"Generate the industry category for the employer. Ensure it is consistent with the employer location\"\n",
-                "                \"City: {{ employer.city }}\\nState: {{ employer.state }}\"),\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "# Next, we'll generate an actual name based on the type of business.\n",
-                "config_builder.add_column(\n",
-                "   C.LLMTextColumn(\n",
-                "        name=\"employer_name\",\n",
-                "        model_alias=model_alias,\n",
-                "        prompt=\"Generate an original name for a {{ employer_business }} business in {{ employer.city }}.\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "# Finally, we'll combine the employer name with the address of the employer.\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_c_employer_name_address_zip\",\n",
-                "        expr=\"{{ employer_name }}\\n{{ employer.street_number }} {{ employer.street_name }}\\n{{ employer.city }}, {{ employer.state }} {{ employer.postcode }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX E (EMPLOYEE'S FIRST NAME, INITIAL, AND LAST NAME) ###\n",
-                "\n",
-                "# We can extract the first name, initial, and last name from the person sampler.\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_e_employee_first_name_initial_last_name\",\n",
-                "        expr=\"{{ taxpayer.first_name }} {{ taxpayer.middle_name[:1] }} {{ taxpayer.last_name }}\",\n",
-                "    )\n",
-                ")\n",
-                "\n",
-                "### BOX F (EMPLOYEE'S ADDRESS AND ZIP CODE) ###\n",
-                "\n",
-                "# Similarly, we can extract the employee's address and zip code from the person sampler.\n",
-                "\n",
-                "config_builder.add_column(\n",
-                "    C.ExpressionColumn(\n",
-                "        name=\"box_f_employee_address_zip\",\n",
-                "        expr=\"{{ taxpayer.street_number }} {{ taxpayer.street_name }}\\n{{ taxpayer.city }}, {{ taxpayer.state }} {{ taxpayer.postcode }}\",\n",
-                "    )\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "7800b823",
-            "metadata": {},
-            "source": [
-                "## Preview the dataset\n",
-                "\n",
-                "We'll define the actual columns we want to appear in the dataset and generate a small 10-row preview."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "62432301",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# These are the columns we want in the final dataset, after dropping latent variables.\n",
-                "FINAL_COLUMNS = [\n",
-                "    \"box_1_wages_tips_other_compensation\",\n",
-                "    \"box_2_federal_income_tax_withheld\",\n",
-                "    \"box_3_social_security_wages\",\n",
-                "    \"box_4_social_security_tax_withheld\",\n",
-                "    \"box_5_medicare_wages_and_tips\",\n",
-                "    \"box_6_medicare_tax_withheld\",\n",
-                "    \"box_7_social_security_tips\",\n",
-                "    \"box_a_employee_ssn\",\n",
-                "    \"box_c_employer_name_address_zip\",\n",
-                "    \"box_e_employee_first_name_initial_last_name\",\n",
-                "    \"box_f_employee_address_zip\",\n",
-                "]\n",
-                "\n",
-                "# Preview the results\n",
-                "preview = data_designer_client.preview(config_builder, verbose_logging=True)\n",
-                "preview.dataset[FINAL_COLUMNS]"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "4925ab9d",
-            "metadata": {},
-            "source": [
-                "## Generating and Saving the Final Dataset\n",
-                "\n",
-                "Once we're happy with the preview, we can generate a larger dataset."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "21e660d4",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Generate a final dataset\n",
-                "job_results = data_designer_client.create(config_builder, num_records=20, wait_until_done=False)\n",
-                "\n",
-                "job_results.wait_until_done()"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "5a8b4dbe",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Load the dataset into a pandas DataFrame\n",
-                "dataset = job_results.load_dataset()\n",
-                "\n",
-                "# Show the final dataset with only the W-2 relevant columns\n",
-                "final_dataset = dataset[FINAL_COLUMNS]\n",
-                "\n",
-                "print(f\"Generated dataset with {len(final_dataset)} records\")\n"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "b9eb0671",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Create data directory if it doesn't exist\n",
-                "import os\n",
-                "os.makedirs(\"./data\", exist_ok=True)\n",
-                "\n",
-                "# Save the dataset to CSV\n",
-                "csv_filename = \"./data/synthetic-w2-dataset.csv\"\n",
-                "final_dataset.to_csv(csv_filename, index=False)\n",
-                "print(f\"Dataset saved to {csv_filename}\")\n",
-                "\n",
-                "# Show a sample of the final dataset\n",
-                "final_dataset.head()"
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "sdg_venv",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.12.11"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 5
-=======
  "cells": [
   {
    "cell_type": "markdown",
@@ -1033,10 +445,10 @@
     "    LLMTextColumnConfig(\n",
     "        name=\"employer_business\",\n",
     "        model_alias=MODEL_ALIAS,\n",
-    "        system_prompt=(\"You are assisting a user generate synthetic W-2 forms.\\n\"\n",
-    "                       \"You must generate a realistic industry category for the employer\\n\"\n",
+    "        system_prompt=(\"You are assisting a user generate synthetic W-2 forms.\"\n",
+    "                       \"You must generate a realistic industry category for the employer\"\n",
     "                       \"eg: software, health insurance, shoe store, restaurant, plumbing /no_think\"),\n",
-    "        prompt=(\"Generate the industry category for the employer. Ensure it is consistent with the employer location\\n\"\n",
+    "        prompt=(\"Generate the industry category for the employer. Ensure it is consistent with the employer location\"\n",
     "                \"City: {{ employer.city }}\\nState: {{ employer.state }}\"),\n",
     "    )\n",
     ")\n",
@@ -1237,5 +649,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
->>>>>>> 8b9be04 (refactored w2 dataset notebook for 25.10)
 }