# Baseline Naive Agent

In [1]:
import pandas as pd
import json
import os 
from datetime import datetime

from dotenv import load_dotenv
from openai import OpenAI
from pathlib import Path

In [2]:
!pip show openai

Name: openai
Version: 2.14.0
Summary: The official Python library for the openai API
Home-page: https://github.com/openai/openai-python
Author: 
Author-email: OpenAI <support@openai.com>
License: Apache-2.0
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: anyio, distro, httpx, jiter, pydantic, sniffio, tqdm, typing-extensions
Required-by: 


In [3]:
!pip install --upgrade openai



In [4]:
features = pd.read_parquet('data_output/clean_features.parquet')

In [5]:
features.head()

Unnamed: 0,CORE_price_paid,CORE_deed_date,1HE_district,BACKUP_county,ID_combined_address_x,ID_fuzzy_match,CORE_match_confidence,CORE_BUILDING_REFERENCE_NUMBER,1HE_CURRENT_ENERGY_RATING,1HE_POTENTIAL_ENERGY_RATING,...,AI_MAINHEAT_DESCRIPTION,1HE_MAINHEAT_ENERGY_EFF,AI_MAINHEATCONT_DESCRIPTION,1HE_MAINHEATC_ENERGY_EFF,AI_LIGHTING_DESCRIPTION,1HE_LIGHTING_ENERGY_EFF,1HE_MECHANICAL_VENTILATION,BACKUP_CONSTRUCTION_AGE_BAND,1HE_TENURE,ID_combined_address_y
0,582000,2022-06-06,BUCKINGHAMSHIRE,BUCKINGHAMSHIRE,49 amersham road nan hp13 5aa,49 amersham road hp13 5aa,92.592593,5175796000.0,D,B,...,"Boiler and radiators, mains gas",Good,"Programmer, room thermostat and TRVs",Good,Low energy lighting in 73% of fixed outlets,Very Good,natural,England and Wales: 1900-1929,Owner Occupied,49 amersham road hp13 5aa
1,582000,2022-06-06,BUCKINGHAMSHIRE,BUCKINGHAMSHIRE,49 amersham road nan hp13 5aa,49 amersham road hp13 5aa,92.592593,5175796000.0,D,B,...,"Boiler and radiators, mains gas",Good,"Programmer, room thermostat and TRVs",Good,Low energy lighting in 53% of fixed outlets,Good,natural,England and Wales: 1900-1929,Owner Occupied,49 amersham road hp13 5aa
2,582000,2022-06-06,BUCKINGHAMSHIRE,BUCKINGHAMSHIRE,49 amersham road nan hp13 5aa,49 amersham road hp13 5aa,92.592593,5175796000.0,E,E,...,"Boiler and radiators, mains gas",Good,"Programmer, room thermostat and TRVs",Good,No low energy lighting,Very Poor,natural,England and Wales: 1900-1929,Owner Occupied,49 amersham road hp13 5aa
3,582000,2022-06-06,BUCKINGHAMSHIRE,BUCKINGHAMSHIRE,49 amersham road nan hp13 5aa,49 amersham road hp13 5aa,92.592593,5175796000.0,D,D,...,"Boiler and radiators, mains gas",Good,"Programmer, room thermostat and TRVs",Good,No low energy lighting,Very Poor,natural,England and Wales: 1900-1929,Owner Occupied,49 amersham road hp13 5aa
4,655000,2024-12-12,BUCKINGHAMSHIRE,BUCKINGHAMSHIRE,67 amersham road nan hp13 5aa,67a amersham road hp13 5aa,90.909091,10005290000.0,D,C,...,"Boiler and radiators, mains gas",Good,"Programmer, room thermostat and TRVs",Good,Low energy lighting in all fixed outlets,Very Good,natural,England and Wales: 1930-1949,Owner Occupied,67a amersham road hp13 5aa


In [6]:
encoding_cols = [col for col in features.columns if col.startswith('1HE_')]
core_cols = [col for col in features.columns if col.startswith('CORE_')]
id_cols = [col for col in features.columns if col.startswith('ID_')]
AI_cols = [col for col in features.columns if col.startswith('AI_')]

final_feature_cols = encoding_cols + core_cols
features = features[final_feature_cols]

train = features.loc[features['CORE_deed_date'].dt.year < 2025]
test = features.loc[features['CORE_deed_date'].dt.year >= 2025]

X_train = train[final_feature_cols].drop(columns=['CORE_price_paid', 'CORE_deed_date', 'CORE_INSPECTION_DATE'], axis=1) # I need to drop date here! 
X_test = test[final_feature_cols].drop(columns=['CORE_price_paid','CORE_deed_date', 'CORE_INSPECTION_DATE'], axis=1) # I need to drop date here! 
y_train = train['CORE_price_paid']
y_test = test['CORE_price_paid']

In [7]:
X_train

Unnamed: 0,1HE_district,1HE_CURRENT_ENERGY_RATING,1HE_POTENTIAL_ENERGY_RATING,1HE_PROPERTY_TYPE,1HE_BUILT_FORM,1HE_ENERGY_TARIFF,1HE_MAINS_GAS_FLAG,1HE_GLAZED_AREA,1HE_HOT_WATER_ENERGY_EFF,1HE_HOT_WATER_ENV_EFF,...,CORE_HEATING_COST_POTENTIAL,CORE_HOT_WATER_COST_CURRENT,CORE_HOT_WATER_COST_POTENTIAL,CORE_TOTAL_FLOOR_AREA,CORE_MULTI_GLAZE_PROPORTION,CORE_EXTENSION_COUNT,CORE_NUMBER_HABITABLE_ROOMS,CORE_NUMBER_HEATED_ROOMS,CORE_LOW_ENERGY_LIGHTING,CORE_NUMBER_OPEN_FIREPLACES
0,BUCKINGHAMSHIRE,D,B,House,Semi-Detached,Single,Y,Normal,Good,Good,...,520.0,148.0,90.0,129.00,10.0,0.000000,6.000000,6.000000,73.0,0.0
1,BUCKINGHAMSHIRE,D,B,House,Semi-Detached,Single,Y,Normal,Good,Good,...,696.0,116.0,116.0,155.00,100.0,1.000000,6.000000,6.000000,53.0,0.0
2,BUCKINGHAMSHIRE,E,E,House,Semi-Detached,Single,Y,Normal,Very Poor,Poor,...,961.0,468.0,399.0,133.66,100.0,1.000000,5.000000,5.000000,0.0,1.0
3,BUCKINGHAMSHIRE,D,D,House,Semi-Detached,Single,Y,Normal,Good,Good,...,941.0,173.0,151.0,157.45,100.0,2.000000,6.000000,6.000000,0.0,1.0
4,BUCKINGHAMSHIRE,D,C,House,Detached,Single,Y,More Than Typical,Good,Good,...,1706.0,354.0,215.0,122.00,100.0,1.000000,6.000000,6.000000,100.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29163,BUCKINGHAMSHIRE,D,C,House,Detached,Single,Y,Normal,Good,Good,...,1395.0,139.0,139.0,261.00,100.0,2.000000,10.000000,10.000000,100.0,0.0
29164,BUCKINGHAMSHIRE,B,B,House,Detached,standard tariff,Not Available,Not Available,Good,Good,...,577.0,100.0,100.0,270.00,100.0,0.838119,5.331698,5.256817,100.0,0.0
29165,BUCKINGHAMSHIRE,B,B,House,Detached,standard tariff,Not Available,Not Available,Good,Good,...,577.0,100.0,100.0,270.00,100.0,0.838119,5.331698,5.256817,100.0,0.0
29166,BUCKINGHAMSHIRE,B,B,House,Detached,standard tariff,Not Available,Not Available,Good,Good,...,577.0,100.0,100.0,270.00,100.0,0.838119,5.331698,5.256817,100.0,0.0


In [8]:
subset = X_train.iloc[1:5,:]

In [9]:
json_string = subset.to_json()

subset.to_json('data_output/X_train.json', orient='records', indent=4)

In [25]:
class NaiveAgent:
    def __init__(self, system_prompt, model = 'gpt-5-mini' ):
        load_dotenv()
        
        api_key = os.getenv('OPENAI_API_KEY')
        if not api_key:
            raise ValueError('OpenAI API Key not found in .env')
        
        self.client = OpenAI(api_key=api_key)
        
        self.model = model
        self.system_prompt = system_prompt
        self.input_path = Path('data_output/X_train.json')
        self.output_dir = Path('data_output')
        self.input_data = None
    
    def JSON_read(self, output_dir='data_output'):
        with self.input_path.open('r', encoding='utf-8') as f:
            input_data = json.load(f)
    
    def call_agent(self,user_prompt="say hello"):
        self.user_prompt = user_prompt
        
        agent_response = self.client.responses.create(
            model = self.model,
            input = [
                {
                    "role":"system",
                    "content":"create a short JSON output with minimal tokens output."
                },
                {
                    "role":"user",
                    "content":"my name, Nirosh. my age, 28"
                }
            ],
            text = {
                "format":{
                    "type":"json_schema",
                    "name":"agent_output",
                    "strict":True,
                    "schema": {
                        "type":"object",
                        "properties":{
                            "output":{"type":"string"}
                        },
                        "required":["output"],
                        "additionalProperties": False
                        }
                    }
                }
        )
        output = json.loads(agent_response.output_text)
        print(json.dumps(output, indent=3))

In [26]:
agent1 = NaiveAgent(system_prompt='Create short minimal JSON with input.')
agent1.call_agent()

{
   "output": "{\"name\":\"Nirosh\",\"age\":28}"
}


# Agentic Models

1. Naive Agent (Baseline)

## 2. Multi-Agent Models