In [1]:
import string
import numpy as np
import pandas as pd
from itertools import product
from collections import Counter
from datasets import load_dataset
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

In [2]:
def metric_calculation(pred, gt):    
    acc=accuracy_score(gt, pred)
    f1=f1_score(gt, pred, average='macro')
    confusion=confusion_matrix(gt, pred)
    fpr=confusion[0,1]/len(gt) ## predict to be 1; actual 0
    fnr=confusion[1,0]/len(gt) ## predict to be 0; actual 1
    return acc, f1, fpr, fnr

In [3]:
def post_processing(pred, model):

    if model=='mistral':
        new_pred = [p.replace('</s>', '').split()[0] for p in pred]
        new_pred = np.array([int(float(i)) if i in ['0', '0.0', '1', '1.0'] else 2 for i in new_pred])
    else:
        new_pred=[]        
        for p in pred:
            if (p.split()[0]=='0') or (p.split()[0]=='1'):
                new_pred.append(p.split()[0])
            else:
                p = p.lower().replace('</s>', '').replace('boxed', '')
                splits=[s for s in p.lower().split('\n') if s != '']
                p = ' '.join(splits[-3:]).translate(str.maketrans('', '', string.punctuation))                
                if 'response' in p:
                    try: new_pred.append([t for t in p.split('response')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                elif 'output' in p:
                    try: new_pred.append([t for t in p.split('output')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                elif 'return' in p:
                    try: new_pred.append([t for t in p.split('return')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                elif 'result' in p:
                    try: new_pred.append([t for t in p.split('result')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                elif 'plaintext' in p:
                    try: new_pred.append([t for t in p.split('plaintext')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                elif 'json' in p:
                    try: new_pred.append([t for t in p.split('json')[-1].split() if t.isnumeric()][0])
                    except: new_pred.append(2)
                else:
                    try: new_pred.append(p.split()[0])
                    except:new_pred.append(2)
        new_pred = np.array([int(float(i)) if i in ['0', '0.0', '1', '1.0'] else 2 for i in new_pred])
    return new_pred

In [6]:
ds = load_dataset("beanham/spatial_join_dataset")
test=ds['test']
gt=np.array(test['label'])[:20]
configs = [
    "few_shot_no_heur_cot",
    "few_shot_with_heur_hint_all_cot",
    "few_shot_with_heur_value_all_cot"
]

In [7]:
results=[]
models=['4o_mini', 'qwen_plus', '4o', 'o3_mini', 'deepseek']
for model in models:
    print(f'Model: {model}...')
    for config in configs:
        pred=np.load(f'base/{model}_cot/{model}_{config}.npy')[:20]
        if model=='deepseek':
            pred=[i[0] for i in pred]
        pred=post_processing(pred, model)
        metrics=metric_calculation(pred, gt)
        results.append([config, model, round(metrics[0],3), metrics[1]])
results=pd.DataFrame(results, columns=['config', 'model', 'acc', 'f1'])

Model: 4o_mini...
Model: qwen_plus...
Model: 4o...
Model: o3_mini...
Model: deepseek...


### Analysis

In [17]:
model='qwen_plus'
config='few_shot_no_heur_cot'
pred=np.load(f'base/{model}_cot/{model}_{config}.npy')[:20]
if model=='deepseek': 
    output=[i[0] for i in pred]
    reasoning=[i[1] for i in pred]
    proc_pred=post_processing(output, model)
else:
    proc_pred=post_processing(pred, model)
right=np.where(proc_pred==gt)[0]
wrong=np.where(proc_pred!=gt)[0]
len(right), len(wrong)

(15, 5)

In [18]:
index=8
print(pred[right[index]])

To determine whether the sidewalk runs alongside the road, we can follow these steps:

### Step 1: Analyze the input geometries
We are given two LineString geometries in GeoJSON format:
- **Sidewalk**: A series of coordinate points representing the sidewalk geometry.
- **Road**: A series of coordinate points representing the road geometry.

### Step 2: Define criteria for "running alongside"
For a sidewalk to be considered as running alongside a road, the following conditions should generally hold:
1. The sidewalk and road should be spatially close to each other.
2. The general direction (trend) of the sidewalk should align with the road.

### Step 3: Compute proximity between sidewalk and road
To assess whether the sidewalk is close to the road, we can calculate the minimum distance between the sidewalk and the road. If the minimum distance is below a certain threshold, it indicates that the sidewalk is near the road.

### Step 4: Check alignment of directions
To determine if the side

In [197]:
print(output[right[index]])

1

The sidewalk meets all three conditions for running alongside the road:
1. **Parallelism**: The min_angle of 0.46° indicates near-perfect alignment between the sidewalk and road.
2. **Clearance**: The min_distance of 9.24 meters ensures no direct overlap or intersection, maintaining a safe separation.
3. **Overlap**: The max_area of ~46.76% shows significant buffer overlap, confirming spatial association without direct contact.

All criteria (parallelism, clearance, overlap) are satisfied, so the answer is **1**.


## deepseek

#### no heur
- Maybe I can compare the direction and proximity of the line segments.
- Looking at the road's coordinates, they are moving eastward (since longitude becomes more negative) and northward (latitude increases).

- wrong conclusion: The sidewalk's start is ~23 meters west of the road's start, beyond typical adjacency for sidewalks.
- wrong conclusoin: The sidewalk moves north, and the road moves south,

### 4o

#### no heur
- check proximinity by calculate distance (pairwise); check alignment
- no calculation, but with conclusion. (indicating; likely parallele; same general direction)
  
#### no heur hints
- follows the hint: calculate angle & distance;
- wrong calculation: the change in longitude & latitude
- unclear calculation: The closest approach between the sidewalk and the road appears to be around **2-3 meters**.
- no calculation, but with conclusion. (For this specific input, after performing the necessary calculations, the conditions are satisfied, so the response is:) (Given the complexity of these calculations and the need for precise geospatial analysis, let's assume the calculations have been performed, and based on the input provided, the conditions are not fully satisfied.)


#### with heur hints
- check each heuristic hints invidually; and in combination.
- compare values (use typical values determined by their own knowledge 10; 5 meter distance; 20%)
- but inconsistent: overlap: 20% v.s. 30~70%; distance 2-10 meters or 1-20

------------

### qwen-plus

#### no heur
- check proximinity; calculate distance (pairwise); direction (northwest to southeast direction). (The sidewalk is close enough to the road to pass the proximity test.)
- Let’s assume the computed distance is approximately **1 meter**(??)
  
#### no heur hints
- follows the hint: calculate angle & distance;
- wrong calculation: Similarly, we can use the first and last points to determine the direction of the road (wrong); or pairwise point angle?? but could happen to arrive at the correction conclusion
- unclear calculation: The closest approach between the sidewalk and the road appears to be around **2-3 meters**.
- no calculation, but with conclusion.


#### with heur hints
- check each heuristic hints invidually; and in combination.
- compare values (use typical values determined by their own knowledge 10; 5 meter distance; 20%)
- but inconsistent: overlap: 20% v.s. 30~70%; distance 2-10 meters or 1-20

------------

### 4o-mini

#### no heur
- check proximinity; calculate distance (pairwise). (We need to check if the sidewalk is close enough to the road to be considered "alongside".)
- threshold 1 meter??
- no results returned
  
#### no heur hints
- follows the hint: calculate angle & distance;
- Similarly, we can use the first and last points to determine the direction of the road (wrong)
- Given the complexity of the calculations and checks, I will assume that the conditions are not satisfied based on the provided examples and return:

#### with heur hints
- check each heuristic hints invidually; and in combination.
- compare values (use typical values determined by their own knowledge 10; 1 meter distance; 10%)