In [16]:
import sys
import os
import json

# Add the parent directory of the package to sys.path
sys.path.append(os.path.abspath('.'))
from solver.difference_solver import Difference_Solver

# Create an instance of Portion_Solver
solver = Difference_Solver()

In [17]:
path = "training_dataset/training_set_difference.json"

In [18]:
with open(path, 'r') as file:
    data = json.load(file)

In [19]:
def extract_data(data, stype_values):
        extracted_data = []
        for item in data:
            # Check if the 'stype' key exists and its value matches one of the specified values in stype_values
            if 'stype' in item and item['stype'] == stype_values:
                extracted_data.append(item)
        return extracted_data

In [20]:
import pandas as pd
from collections import Counter
stype_combinations = [tuple(d["stype"]) for d in data]
combination_counts = Counter(stype_combinations)

# Convert to DataFrame for better visualization
combination_df = pd.DataFrame(combination_counts.items(), columns=['Combination', 'Count'])
combination_df = combination_df.sort_values(by='Count', ascending=False)

combination_df

Unnamed: 0,Combination,Count
0,"(有, 有, 問相差)",42
6,"(有, 問相差)",28
8,"(每單位量, 每單位量, 問相差)",20
4,"(每單位量, 量變, 量變, 問相差)",11
1,"(量變, 量變, 問相差)",9
10,"(每單位量, 每單位量, 量變, 問相差)",6
12,"(每單位量, 問相差)",5
5,"(每單位量, 有, 有, 問相差)",5
9,"(量變, 問相差)",4
22,"(有, 有, 有, 問相差)",4


In [21]:
extracted_data = data

In [22]:
stype_list=[
    ["有","有","問相差"],
    ["有","問相差"],
]
stype = stype_list[0]
extracted_data = extract_data(data,stype)

In [23]:
extracted_data

[{'qid': '71324',
  'sentence': '阿智和言言共有7.32盒筆芯。已知阿智有4.62盒，兩人的筆芯相差幾盒？',
  'quantities': {'s1': [{'qtyid': 'E1',
     'quantity': {'value': '7.32', 'unit': '盒', 'entity': '芯'},
     'verb': '共有',
     'subject': '阿智',
     'object': None}],
   's2': [{'qtyid': 'E2',
     'quantity': {'value': '4.62', 'unit': '盒', 'entity': '芯'},
     'verb': '知',
     'subject': '阿智',
     'object': None},
    {'qtyid': 'E3',
     'quantity': {'value': '4.62', 'unit': '盒', 'entity': '芯'},
     'verb': '有',
     'subject': '阿智',
     'object': None}],
   's3': [{'qtyid': 'E4',
     'quantity': {'value': '幾', 'unit': '盒', 'entity': '芯'},
     'verb': '相差',
     'subject': '人|芯',
     'object': None},
    {'qtyid': 'E5',
     'quantity': {'value': '兩', 'unit': None, 'entity': '人'},
     'verb': '相差',
     'subject': None,
     'object': None}]},
  'unitmaps': {'s3': [{'denominator': 'E4', 'numerator': 'E5'}]},
  'answer': '1.92盒',
  'stype': ['有', '有', '問相差']},
 {'qid': '71302',
  'sentence': '小賢和小鈞共有5.65盒

In [24]:
len(extracted_data)

42

In [25]:

wrong_case = []
wrong = 0
correct = 0
unsolve  = 0

for i in range(0, len(extracted_data)):
    try:
        if solver.solve(extracted_data[i]):
            correct+=1
        else:
            wrong_case.append(extracted_data[i])
            wrong+=1

    except:
        wrong_case.append((extracted_data[i]))
        wrong+=1

print(f"Problems:{len(extracted_data)}, Correct:{correct}, Wrong:{wrong}, Accuracy:{correct/len(extracted_data)} ")     

{'QuestionID': '71324', 'Variable': {'X1': {'value': '7.32', 'unit': '盒', 'entity': '芯', 'sentence': 's1', 'clue': None, 'verb': '共有', 'subject': '阿智', 'object': None}, 'X2': {'value': '4.62', 'unit': '盒', 'entity': '芯', 'sentence': 's2', 'clue': None, 'verb': '知', 'subject': '阿智', 'object': None}, 'X3': {'value': '4.62', 'unit': '盒', 'entity': '芯', 'sentence': 's2', 'clue': None, 'verb': '有', 'subject': '阿智', 'object': None}, 'X4': {'value': '?', 'unit': '盒', 'entity': '芯', 'sentence': 's3', 'clue': None, 'verb': '相差', 'subject': '人|芯', 'object': None}, 'X5': {'value': 2, 'unit': None, 'entity': '人', 'sentence': 's3', 'clue': None, 'verb': '相差', 'subject': None, 'object': None}}, 'stype': ['有', '有', '問相差'], 'Description': '', 'Formula': '', 'Answer': '1.92盒', '第一比較物': None, '第二比較物': None, '共有': None}
{'QuestionID': '71302', 'Variable': {'X1': {'value': '5.65', 'unit': '盒', 'entity': '電池', 'sentence': 's1', 'clue': None, 'verb': '共有', 'subject': '小賢|小鈞', 'object': None}, 'X2': {'value'

In [26]:
extracted_data

[{'qid': '71324',
  'sentence': '阿智和言言共有7.32盒筆芯。已知阿智有4.62盒，兩人的筆芯相差幾盒？',
  'quantities': {'s1': [{'qtyid': 'E1',
     'quantity': {'value': '7.32', 'unit': '盒', 'entity': '芯'},
     'verb': '共有',
     'subject': '阿智',
     'object': None}],
   's2': [{'qtyid': 'E2',
     'quantity': {'value': '4.62', 'unit': '盒', 'entity': '芯'},
     'verb': '知',
     'subject': '阿智',
     'object': None},
    {'qtyid': 'E3',
     'quantity': {'value': '4.62', 'unit': '盒', 'entity': '芯'},
     'verb': '有',
     'subject': '阿智',
     'object': None}],
   's3': [{'qtyid': 'E4',
     'quantity': {'value': '?', 'unit': '盒', 'entity': '芯'},
     'verb': '相差',
     'subject': '人|芯',
     'object': None},
    {'qtyid': 'E5',
     'quantity': {'value': 2, 'unit': None, 'entity': '人'},
     'verb': '相差',
     'subject': None,
     'object': None}]},
  'unitmaps': {'s3': [{'denominator': 'E4', 'numerator': 'E5'}]},
  'answer': '1.92盒',
  'stype': ['有', '有', '問相差']},
 {'qid': '71302',
  'sentence': '小賢和小鈞共有5.65盒電池