In [1]:
import sys
import os
import json

# Add the parent directory of the package to sys.path
sys.path.append(os.path.abspath('.'))
from solver.portion_solver import Portion_Solver

# Create an instance of Portion_Solver
solver = Portion_Solver()

In [2]:
path = "training_dataset/training_set_portion.json"

In [3]:
with open(path, 'r') as file:
    data = json.load(file)

In [4]:
def extract_data(data, stype_values):
        extracted_data = []
        for item in data:
            # Check if the 'stype' key exists and its value matches one of the specified values in stype_values
            if 'stype' in item and item['stype'] == stype_values:
                extracted_data.append(item)
        return extracted_data

In [5]:
import pandas as pd
from collections import Counter
stype_combinations = [tuple(d["stype"]) for d in data]
combination_counts = Counter(stype_combinations)

# Convert to DataFrame for better visualization
combination_df = pd.DataFrame(combination_counts.items(), columns=['Combination', 'Count'])
combination_df = combination_df.sort_values(by='Count', ascending=False)

combination_df

Unnamed: 0,Combination,Count
0,"(有, 每單位量, 問分配)",95
3,"(有, 每單位量, 問分配, 問剩下)",17
12,"(每單位量, 每單位量, 問分配, 問剩下)",8
6,"(量變, 每單位量, 問分配)",7
16,"(有, 平分, 問分配)",6
7,"(有, 每單位量, 每單位量, 問分配)",6
9,"(有, 量變, 每單位量, 問分配)",6
2,"(量變, 每單位量, 問分配, 問剩下)",5
13,"(每單位量, 有, 問分配)",4
15,"(每單位量, 每單位量, 問分配)",3


In [6]:
extracted_data = data

In [7]:
len(extracted_data)

170

In [8]:
extracted_data

[{'qid': '61298',
  'sentence': '有845朵玫瑰，平均每13朵分裝成一束，可裝成幾束？',
  'quantities': {'s1': [{'qtyid': 'E1',
     'quantity': {'value': '845', 'unit': '朵', 'entity': '玫瑰'},
     'verb': '有',
     'subject': None,
     'object': None}],
   's2': [{'qtyid': 'E2',
     'quantity': {'value': '13', 'unit': '朵', 'entity': '玫瑰'},
     'verb': '分裝',
     'subject': None,
     'object': None},
    {'qtyid': 'E3',
     'quantity': {'value': '一', 'unit': '束', 'entity': '玫瑰'},
     'verb': '分裝',
     'subject': None,
     'object': None}],
   's3': [{'qtyid': 'E4',
     'quantity': {'value': '幾', 'unit': '束', 'entity': '玫瑰'},
     'verb': '裝成',
     'subject': None,
     'object': None}]},
  'unitmaps': {'s2': [{'denominator': 'E2', 'numerator': 'E3'}]},
  'answer': '65束',
  'stype': ['有', '每單位量', '問分配']},
 {'qid': '61299',
  'sentence': '老師買了512枝筆，想平均分給32位學生，每位學生最多會分到幾枝？',
  'quantities': {'s1': [{'qtyid': 'E1',
     'quantity': {'value': '512', 'unit': '枝', 'entity': '筆'},
     'verb': '買',
     'subje

In [9]:
correct_case = []
wrong_case = []
wrong = 0
correct = 0
unsolve  = 0

for i in range(0, len(extracted_data)):
    try:
        if solver.solve(extracted_data[i]):
            correct+=1
            correct_case.append((extracted_data[i]))
        else:
            wrong_case.append(extracted_data[i])
            wrong+=1

    except:
        wrong_case.append((extracted_data[i]))
        wrong+=1

print(f"Problems:{len(extracted_data)}, Correct:{correct}, Wrong:{wrong}, Accuracy:{correct/len(extracted_data)} ")     
    

{'Formula': '845 / 13.0 = 65.0', 'My Answer': '65.0束'}
{'Formula': '512 / 32.0 = 16.0', 'My Answer': '16.0枝'}
{'Formula': '9000 / 110.0 = 81.0...90.0', 'My Answer': '81.0箱;90.0顆'}
{'Formula': '88 / 16.0 = 5.0...8.0', 'My Answer': '5.0束;8.0朵'}
{'Formula': '775000 / 1800.0 = 430.0...1000.0', 'My Answer': '430.0箱;1000.0枝'}
{'Formula': '99 / 19.0 = 5.0...4.0', 'My Answer': '5.0簍;4.0個'}
{'Formula': '36572 / 100.0 = 365.0...72.0', 'My Answer': '365.0箱;72.0架'}
{'Formula': '(120 / 6.0) / 5.0 = 4.0', 'My Answer': '4.0箱'}
{'Formula': '99 / 27.0 = 3.0...18.0', 'My Answer': '3.0條;18.0顆'}
{'Formula': '74625 / 375.0 = 199.0', 'My Answer': '199.0瓶'}
{'Formula': '(600 / 3.0) / 4.0 = 50.0', 'My Answer': '50.0間'}
{'Formula': '84 / 0.5 = 168.0', 'My Answer': '168.0個'}
{'Formula': '(720 / 5.0) / 9.0 = 16.0', 'My Answer': '16.0箱'}
{'Formula': '(800 - 146) / 6.0 = 109.0', 'My Answer': '109.0盒'}
{'Formula': '(720 / 5.0) / 6.0 = 24.0', 'My Answer': '24.0間'}
{'Formula': '(440 / 5.0) / 8.0 = 11.0', 'My Answer':

In [10]:
wrong_case

[{'qid': '111308',
  'sentence': '一袋紅豆重21+(1/3)公斤。玲玲有3袋的紅豆，每1+(19/45)公斤裝一包，相當於裝幾包？',
  'quantities': {'s1': [{'qtyid': 'E1',
     'quantity': {'value': 1, 'unit': '袋', 'entity': '紅豆'},
     'verb': '重',
     'subject': None,
     'object': None},
    {'qtyid': 'E2',
     'quantity': {'value': '21+(1/3)', 'unit': '公斤', 'entity': '紅豆'},
     'verb': '重',
     'subject': None,
     'object': None}],
   's2': [],
   's3': [{'qtyid': 'E3',
     'quantity': {'value': '1+(19/45)', 'unit': '公斤', 'entity': '紅豆'},
     'verb': '裝',
     'subject': None,
     'object': None},
    {'qtyid': 'E4',
     'quantity': {'value': 1, 'unit': '包', 'entity': '紅豆'},
     'verb': '裝',
     'subject': None,
     'object': None}],
   's4': [{'qtyid': 'E5',
     'quantity': {'value': '?', 'unit': '包', 'entity': '紅豆'},
     'verb': '相當於',
     'subject': None,
     'object': None}]},
  'unitmaps': {'s1': [{'denominator': 'E1', 'numerator': 'E2'}],
   's3': [{'denominator': 'E3', 'numerator': 'E4'}]},
  'answer': 

In [11]:
solver.solve(correct_case[2])

{'Formula': '9000 / 110.0 = 81.0...90.0', 'My Answer': '81.0箱;90.0顆'}


True

In [12]:
correct_case[26]

{'qid': '30873',
 'sentence': '用減法算式把做法記下來：16顆巧克力，每4顆裝成一盒，可以裝成幾盒？',
 'quantities': {'s1': [{'qtyid': 'E1',
    'quantity': {'value': '16', 'unit': '顆', 'entity': '巧克力'},
    'verb': '用',
    'subject': None,
    'object': None}],
  's2': [{'qtyid': 'E2',
    'quantity': {'value': '4', 'unit': '顆', 'entity': '巧克力'},
    'verb': '裝成',
    'subject': None,
    'object': None},
   {'qtyid': 'E3',
    'quantity': {'value': 1, 'unit': '盒', 'entity': '巧克力'},
    'verb': '裝成',
    'subject': None,
    'object': None}],
  's3': [{'qtyid': 'E4',
    'quantity': {'value': '?', 'unit': '盒', 'entity': '巧克力'},
    'verb': '裝成',
    'subject': None,
    'object': None}]},
 'unitmaps': {'s2': [{'denominator': 'E2', 'numerator': 'E3'}]},
 'answer': '4盒',
 'stype': ['有', '每單位量', '問分配']}

In [13]:
solver.solve(correct_case[26])

{'Formula': '16 / 4.0 = 4.0', 'My Answer': '4.0盒'}


True

In [14]:
def get_question_data(qid, data):
    for question in data:
        if question['qid'] == qid:
            return question
    return None  # 

In [15]:
tmp = get_question_data("1MA0131110",data)

In [16]:
tmp

{'qid': '1MA0131110',
 'sentence': '鉛筆一枝賣5元，美美帶了48元，最多可以買幾枝鉛筆？',
 'quantities': {'s1': [{'qtyid': 'E1',
    'quantity': {'value': 1, 'unit': '枝', 'entity': '鉛筆'},
    'verb': '賣',
    'subject': None,
    'object': None},
   {'qtyid': 'E2',
    'quantity': {'value': '5', 'unit': '元', 'entity': '鉛筆'},
    'verb': '賣',
    'subject': None,
    'object': None}],
  's2': [{'qtyid': 'E3',
    'quantity': {'value': '48', 'unit': '元', 'entity': '鉛筆'},
    'verb': '美美',
    'subject': None,
    'object': None}],
  's3': [{'qtyid': 'E4',
    'quantity': {'value': '?', 'unit': '枝', 'entity': '鉛筆'},
    'verb': '可以',
    'subject': None,
    'object': None}]},
 'unitmaps': {'s1': [{'denominator': 'E1', 'numerator': 'E2'}]},
 'answer': '9枝',
 'stype': ['每單位量', '有', '問分配']}

In [17]:
solver.solve(tmp)

{'Formula': '48 / 5.0 = 9.6', 'My Answer': '9.6枝'}


False