# 詞性標注練習

In [None]:
# 安裝
# pip install spacy
# python -m spacy download en_core_web_md

In [27]:
import spacy

In [28]:
# 載入英文模型
nlp = spacy.load("en_core_web_md")

# 要分析的句子
sentence = "List suppliers who supply red parts?"

# 使用 spaCy 處理句子
doc = nlp(sentence)

# 輸出每個詞彙的文本、通用詞性和詳細詞性
for token in doc:
    print(f"{token.text:12} {token.pos_:10} {token.dep_}")

List         NOUN       compound
suppliers    NOUN       ROOT
who          PRON       nsubj
supply       VERB       relcl
red          ADJ        amod
parts        NOUN       dobj
?            PUNCT      punct


In [5]:
nlp = spacy.load("en_core_web_md")
sen = "List the students who take the course taught by Frank"
doc = nlp(sen)
for token in doc:
    print(f"{token.text:12} {token.pos_:10} {token.dep_}")

List         VERB       ROOT
the          DET        det
students     NOUN       dobj
who          PRON       nsubj
take         VERB       relcl
the          DET        det
course       NOUN       dobj
taught       VERB       acl
by           ADP        agent
Frank        PROPN      pobj


In [7]:
response_data = {
    'data': {
        'Get': {
            'Table': [
                {
                    '_additional': {'distance': 0.29302722, 'id': 'eaf6f825-3992-4dc4-9888-e64ddcf0ab3f'},
                    'name': 'Suppliers',
                    'ref': {'tag': ['subject', 'object', 'verb']}
                },
                {
                    '_additional': {'distance': 0.6119833, 'id': '7538c959-7790-49a8-b52e-ecdc95773eb0'},
                    'name': 'Shipments',
                    'ref': {'tag': ['subject', 'object', 'verb']}
                }
            ]
        }
    }
}

name = response_data['data']['Get']['Table'][0]['name']
print(name)

Suppliers


In [8]:
# 檢查回傳是否為空陣列
def is_empty_result(response, key_name):
    if 'data' in response and 'Get' in response['data']:
        get_data = response['data']['Get']
        if key_name in get_data:
            return isinstance(get_data[key_name], list) and len(get_data[key_name]) == 0
    return False

# 使用例子
response_empty_array_value = {'data': {'Get': {'Value': []}}}
response_empty_array_table = {'data': {'Get': {'Table': []}}}
response_empty_array_column = {'data': {'Get': {'Column': []}}}
response_with_data = {'data': {'Get': {'Table': [{'_additional': {'distance': 0.32905298, 'id': 'e58b7533-168d-4c8c-bf04-f387fd9c5053'}, 'name': 'Parts', 'ref': {'tag': ['subject', 'object', 'verb']}}]}}}

if is_empty_result(response_empty_array_value, 'Value'):
    print("回傳為空陣列(Value)")
else:
    print("回傳不為空陣列(Value)")

if is_empty_result(response_empty_array_table, 'Table'):
    print("回傳為空陣列(Table)")
else:
    print("回傳不為空陣列(Table)")

if is_empty_result(response_empty_array_column, 'Column'):
    print("回傳為空陣列(Column)")
else:
    print("回傳不為空陣列(Column)")

if is_empty_result(response_with_data, 'Table'):
    print("回傳為空陣列(Table)")
else:
    print("回傳不為空陣列(Table)")


回傳為空陣列(Value)
回傳為空陣列(Table)
回傳為空陣列(Column)
回傳不為空陣列(Table)


In [3]:


# 示例句子
text = "List suppliers who supply all red parts or are not located in Paris."

# 使用 spaCy 处理文本
doc = nlp(text)

# 打印句法依存关系
for token in doc:
    print(f"{token.text:{12}} {token.dep_:{10}} {token.head.text:{12}} {token.head.pos_:{10}}")

# 识别相对子句
relative_clauses = []
for token in doc:
    if token.dep_ == "relcl":
        clause = " ".join([tok.text for tok in token.subtree])
        relative_clauses.append(clause)

print("\n相对子句:")
for clause in relative_clauses:
    print(clause)


List         compound   suppliers    NOUN      
suppliers    ROOT       suppliers    NOUN      
who          nsubj      supply       VERB      
supply       relcl      suppliers    NOUN      
all          det        parts        NOUN      
red          amod       parts        NOUN      
parts        dobj       supply       VERB      
or           cc         supply       VERB      
are          auxpass    located      VERB      
not          neg        located      VERB      
located      conj       supply       VERB      
in           prep       located      VERB      
Paris        pobj       in           ADP       
.            punct      suppliers    NOUN      

相对子句:
who supply all red parts or are not located in Paris


In [26]:
# 示例句子
text = "List suppliers who supply all red parts or are not located in Paris."

# 使用 spaCy 处理文本
doc = nlp(text)

# 初始化条件列表
conditions = []

# 遍历句子中的 token
for token in doc:
    # 查找相对子句的起始点（通常是由 "who" 引导）
    if token.dep_ == "relcl":
        # 从相对子句的起始点收集子树中的 token 文本
        clause = " ".join([tok.text for tok in token.subtree])
        conditions.append(clause)

# 分割条件的函数
def split_conditions(conditions):
    split_conditions = []
    for condition in conditions:
        # 处理 "and" 和 "or" 连接的情况
        if ' or ' in condition:
            parts = condition.split(' or ')
            split_conditions.extend(parts)
        if ' and ' in condition:
            parts = condition.split(' and ')
            split_conditions.extend(parts)
    return split_conditions

# 尝试进一步分割条件
split_conditions = split_conditions(conditions)

print("条件:")
for condition in split_conditions:
    print(condition)

# 待解決：找出條件是修飾說誰

条件:
who supply all red parts
are not located in Paris


In [25]:
# 示例句子
text = "List suppliers located in Paris."

# 使用 spaCy 处理文本
doc = nlp(text)

# 初始化条件列表
conditions = []

# 遍历句子中的 token
for token in doc:
    # 检查 token 是否为介词
    if token.pos_ == "ADP":  # ADP 代表介词
        # 收集介词短语中的 token 文本
        prepositional_phrase = " ".join([tok.text for tok in token.subtree])
        conditions.append(prepositional_phrase)

# 根据是否找到条件来打印相应的信息
if conditions:
    print("找到的条件：")
    for condition in conditions:
        print(condition)
else:
    print("未找到明确的条件。")


找到的条件：
in Paris


In [29]:
# 功能全都正常，待解決 isnt 的問題(似乎只有子句有問題)

def process_and_annotate_sentence(text):
    doc = nlp(text)
    # Flags for main clause
    main_clause_negation = False
    main_clause_all = False
    conditions = []
    has_subclause = False

    # Detect 'all' in the main clause by checking tokens outside any subclause
    main_clause_tokens = [token.text.lower() for token in doc if not list(token.ancestors) or all(ancestor.dep_ != 'relcl' for ancestor in token.ancestors)]
    if "all" in main_clause_tokens:
        main_clause_all = True

    # Process each token in the document
    for token in doc:
        # Check for negation in the main clause
        if token.dep_ == "neg" and not any(ancestor.dep_ == "relcl" for ancestor in token.ancestors):
            main_clause_negation = True

        # Process relative clauses ('relcl' dependency)
        if token.dep_ == "relcl":
            has_subclause = True
            clause = " ".join([tok.text for tok in token.subtree])
            modified_noun = token.head.text
            
            # Directly determine negation and "all" within the subclause
            conditions.append((clause, modified_noun))

    split_conditions = split_and_annotate_conditions(conditions)

    # Print annotations
    print_annotations(main_clause_negation, main_clause_all, split_conditions, has_subclause)

def split_and_annotate_conditions(conditions):
    split_conditions = []
    for condition, modified_noun in conditions:
        if ' or ' in condition:
            parts = condition.split(' or ')
            split_conditions.extend([(part.strip(), modified_noun) for part in parts])
        elif ' and ' in condition:
            parts = condition.split(' and ')
            split_conditions.extend([(part.strip(), modified_noun) for part in parts])
        else:
            split_conditions.append((condition, modified_noun))
    
    return identify_negation_and_all(split_conditions)

def identify_negation_and_all(split_conditions):
    annotated_conditions = []
    for condition, modified_noun in split_conditions:
        negation = "not" in condition.lower()
        all_quantifier = "all" in condition.lower()
        annotation = {
            "condition": condition,
            "modified_noun": modified_noun,
            "negation": negation,
            "all": all_quantifier
        }
        annotated_conditions.append(annotation)
    return annotated_conditions

def print_annotations(main_clause_negation, main_clause_all, annotated_conditions, has_subclause):
    print(f"句子主體註記：\n否定：{main_clause_negation}，包含 'all'：{main_clause_all}")
    if has_subclause:
        print("\n子句的條件、修飾詞彙及註記：")
        for annotation in annotated_conditions:
            print(f"條件：'{annotation['condition']}'，修飾的詞彙：{annotation['modified_noun']}，否定：{annotation['negation']}，包含 'all'：{annotation['all']}")
    else:
        print("\n子句是否存在：False")

# Example sentence
text = "List suppliers who supply red parts."
process_and_annotate_sentence(text)

句子主體註記：
否定：False，包含 'all'：False

子句的條件、修飾詞彙及註記：
條件：'who supply red parts'，修飾的詞彙：suppliers，否定：False，包含 'all'：False
