In [0]:
%sql
select * from mycatalog.hp_prd_data.nearby_parsed_data limit 10

In [0]:
%sql
select 
distinct zip from mycatalog.hp_prd_data.nearby_parsed_data limit 10

In [0]:
%sql
select * from mycatalog.hp_prd_data.uszips where zip == "77713"

In [0]:
%sql
select 
* from mycatalog.hp_prd_data.nearby_parsed_data where zip == "77713"

In [0]:
%sql
create or replace view mycatalog.hp_prd_data.dev_deduplicated
as
with parsed_data as (
select
  `mls#` as house_mls,
  mls_status,
  house_price,
  coalesce(hoa,'0') as hoa,
  sqft,
  price_per_sqft,
  lot_size,
  beds,
  baths,
  fullbaths,
  partialBaths,
  location,
  stories,
  latitude,
  longitude,
  streetLine,
  city,
  state,
  zip,
  propertyId,
  yearBuilt,
  EXTRACT(YEAR FROM CURRENT_DATE) - yearBuilt AS age,
  url,
  isHot,
  hasVirtualTour,
  hasVideoTour,
  has3DTour,
  isNewConstruction,
  listingRemarks,
  row_number() over(partition by propertyId order by ingestion_timestamp desc) as row_num
from mycatalog.hp_prd_data.nearby_parsed_data
where TRY_CAST(zip AS NUMERIC) IS NOT NULL
)
select 
  house_mls,
  mls_status,
  house_price,
  hoa,
  sqft,
  price_per_sqft,
  lot_size,
  beds,
  baths,
  fullbaths,
  partialBaths,
  location,
  stories,
  latitude,
  longitude,
  streetLine,
  city,
  state,
  zip,
  propertyId,
  yearBuilt,
  age,
  url,
  isHot,
  hasVirtualTour,
  hasVideoTour,
  has3DTour,
  isNewConstruction,
  listingRemarks
from parsed_data where row_num = 1



In [0]:
%sql
select *  from mycatalog.hp_prd_data.dev_deduplicated limit 20

In [0]:
%sql
select
count(*) as row_count,
count(DISTINCT zip) AS TOTAL_ZIP_CODES,
COUNT(DISTINCT propertyId) AS TOTAL_PROPERTY,
count(DISTINCT house_mls) as mls_count
from mycatalog.hp_prd_data.dev_deduplicated

In [0]:
%sql
select
zip,
count(*) as row_count
from mycatalog.hp_prd_data.dev_deduplicated
group by zip
order by row_count asc

In [0]:
# now left join this with zipcode data

In [0]:
%sql
select * from mycatalog.hp_prd_data.uszips where zip == "77713"

In [0]:
%sql
create or replace view  mycatalog.hp_prd_data.dev_fp_parsed_data as
with location_data as (
  select 
   zip,
   lat as zip_lat,
   lng as zip_lng,
   population as zip_population,
   density as zip_density
  from mycatalog.hp_prd_data.uszips
)
select
 parsed_data.*,
 location_data.zip_lat,
 location_data.zip_lng,
 location_data.zip_population,
 location_data.zip_density
from mycatalog.hp_prd_data.dev_deduplicated parsed_data
inner join location_data on parsed_data.zip = location_data.zip

In [0]:
%sql
select
count(*) as row_count,
count(DISTINCT zip) AS TOTAL_ZIP_CODES,
COUNT(DISTINCT propertyId) AS TOTAL_PROPERTY,
count(DISTINCT house_mls) as mls_count
from mycatalog.hp_prd_data.dev_deduplicated

In [0]:
%sql
select
count(*) as row_count,
count(DISTINCT zip) AS TOTAL_ZIP_CODES,
COUNT(DISTINCT propertyId) AS TOTAL_PROPERTY,
count(DISTINCT house_mls) as mls_count
from mycatalog.hp_prd_data.dev_fp_parsed_data

In [0]:
%sql
select * from mycatalog.hp_prd_data.dev_fp_parsed_data limit 10

In [0]:
%sql
create or replace table  mycatalog.hp_prd_data.dev_fp_parsed_data_table as
with location_data as (
  select 
   zip,
   lat as zip_lat,
   lng as zip_lng,
   population as zip_population,
   density as zip_density
  from mycatalog.hp_prd_data.uszips
)
select
 parsed_data.*,
 location_data.zip_lat,
 location_data.zip_lng,
 location_data.zip_population,
 location_data.zip_density
from mycatalog.hp_prd_data.dev_deduplicated parsed_data
inner join location_data on parsed_data.zip = location_data.zip

In [0]:
%sql
select * from  mycatalog.hp_prd_data.dev_fp_parsed_data_table  limit 10

In [0]:
# creating a table by extracting the top features of the home

In [0]:
%sql
create or replace table mycatalog.hp_prd_data.dev_fp_home_features as
SELECT
  propertyId,
  listingRemarks,   -- Placeholder for the input column
  ai_query(
    'databricks-gemma-3-12b',
    CONCAT("Extract the top 4 key features from the home description below as a comma-separated list. example
    Input: Very nice home with Spacious floor plan, Multiple living areas, Desirable location, Fenced and gated property. Output should be: Spacious floor plan, Multiple living areas, Desirable location, Fenced and gated property", listingRemarks)    -- Placeholder for the prompt and input
  ) AS top_features  -- Placeholder for the output column
FROM  mycatalog.hp_prd_data.dev_fp_parsed_data_table  -- Placeholder for the table name



In [0]:
%sql
select * from mycatalog.hp_prd_data.dev_fp_home_features limit 20