In [1]:
# 数据处理
import pandas as pd
import numpy as np
import json
import csv
import random
import math
from itertools import product


# 网络分析与可视化
import networkx as nx
from pyvis import network as net
import matplotlib.pyplot as plt
import powerlaw # Power laws are probability distributions with the form:p(x)∝x−α

In [2]:
# 读取已经处理好的完整语义网络数据

df_complete = pd.read_csv('01_Processed Data/Complete-Data.csv')
df_complete.head()

Unnamed: 0,Concept,Rel,Feature,Frequency,familiarity,concretness,上级类别,下级类别,coverage,cue_validity,categorical_distinctiveness
0,安全带,可以,可以-保护,27.096774,9.0,9.0,人造物,工具,0.851345,0.03832,0.158537
1,安全带,是,是-带子,22.258065,9.0,9.0,人造物,工具,0.851345,0.286019,0.012195
2,安全带,其他,其他-车,17.419355,9.0,9.0,人造物,工具,0.851345,0.122012,0.021341
3,安全带,是,是-安保的,16.451613,9.0,9.0,人造物,工具,0.851345,0.089938,0.041159
4,安全带,可以,可以-保护人,15.483871,9.0,9.0,人造物,工具,0.851345,0.149063,0.035061


## 生成命名测试用数据 Picture Naming Test Data Generation

In [2]:
## Input 
word_list = ['豆浆', '菠萝汁', '咖啡']


# word_list = cue_words

data = {
    'currentStep': 0,
    'steps': []
}

for word in word_list:
    new_step = {
        'name':word,
        # 'image':'local-resource:///Users/zhanghexin/aphasia_viz/src/assets/test_images/'+word+'.jpeg',
        'image':'/test_images/'+word+'.jpeg',
        'countdown':20,
        'result':'fail',
        'status':'unchecked',
    }
    data['steps'].append(new_step)

with open("02_VASystemData/test_picture-naming.json", "w") as f: 
    json.dump(data, f, ensure_ascii=False)

## 生成语义特征分析训练用数据 SFA Data Generation

In [3]:
 #列出共有某个语义特征的同类概念

def List_Related_Concepts(concept,feature):
    main_category = df_complete[df_complete['Concept'] == concept]['上级类别'].unique()[0]
    related_concepts = list(df_complete[(df_complete['上级类别'] == main_category) & (df_complete['Feature'] == feature)]['Concept'].unique())
    related_concepts = list(filter(lambda x: x != concept, related_concepts))

    return related_concepts

# List_Related_Concepts('安全带','可以-保护人')


# 易混淆的语义特征
# 给到（概念，语义特征）
#   选出所有的同类概念，选出所有同类语义特征，计算每个同类语义特征在该类概念中的线索度
#     按照线索度高低排序
#         如果该特征不为该概念所有
#             则作为混淆项

def List_Confused_Feature(concept, feature):
    related_concepts = List_Related_Concepts(concept,feature)
    df_related = df_complete[(df_complete['Concept'].isin(related_concepts))]

    relationship = df_complete[(df_complete['Concept'] == concept) & (df_complete['Feature'] == feature)]['Rel'].unique()[0]
    confused_feature_list = list(df_related[df_related['Rel'] == relationship].sort_values(by=['categorical_distinctiveness','cue_validity'],ascending=False)['Feature'].unique())
    confused_feature_list = list(filter(lambda x: x not in list(df_complete[df_complete['Concept'] == concept]['Feature']), confused_feature_list))

    return confused_feature_list

# List_Confused_Feature('安全带','可以-保护人')[0:10]


In [5]:
## Input 
word_list = ['轿车',
  '摩托车',
  '消防车',
  '消防栓',
  '水',
  '冰红茶',
  '谷子',
  '薄荷',
  '白菜']
# word_list = cue_words


data = {
 "steps": []
}
for word in word_list:
    new_step = {
        'result':'',
        'status':'unchecked',
        "graph": {
            "id": "root",
            "label": word,
            'img':'/test_images/'+word+'.jpeg',
            "children": []
        },
        "mention": {
            "属于":[],
            "用于":[],
            "做":[],
            "有":[],
            "在":[],
            "联想到":[],
        }
    }


    concept = word
    maincategory = df_complete[df_complete['Concept'] == concept]['上级类别'].unique()[0]
    mention_belongto = list(df_complete[df_complete['上级类别'] == maincategory]['下级类别'].unique())
    # rel_list = list(df_complete['Rel'].unique())

    feature_function = df_complete[(df_complete['Concept']==concept)&(df_complete['Rel']=='可以')].sort_values(by=['cue_validity'],ascending=False)['Feature'].unique()[0]
    mention_function = List_Confused_Feature(concept,feature_function)[0:2]
    mention_function.append(feature_function)

    feature_need = df_complete[(df_complete['Concept']==concept)&(df_complete['Rel']=='需要')].sort_values(by=['cue_validity'],ascending=False)['Feature'].unique()[0]
    mention_need = List_Confused_Feature(concept,feature_need)[0:2]
    mention_need.append(feature_need)

    feature_have = df_complete[(df_complete['Concept']==concept)&(df_complete['Rel']=='有')].sort_values(by=['cue_validity'],ascending=False)['Feature'].unique()[0]
    mention_have = List_Confused_Feature(concept,feature_have)[0:2]
    mention_have.append(feature_have)

    feature_similar = df_complete[(df_complete['Concept']==concept)&(df_complete['Rel']=='像')].sort_values(by=['cue_validity'],ascending=False)['Feature'].unique()[0]
    mention_similar = List_Confused_Feature(concept,feature_similar)[0:2]
    mention_similar.append(feature_similar)

    new_step['mention']['属于'] = mention_belongto
    new_step['mention']['用于'] = [i.replace('可以-','')  for i in mention_function]
    new_step['mention']['做'] = [i.replace('需要-','')  for i in mention_need]
    new_step['mention']['有'] = [i.replace('有-','')  for i in mention_have]
    new_step['mention']['联想到'] = [i.replace('像-','')  for i in mention_similar]
    
    data['steps'].append(new_step)


with open("02_VASystemData/test_SFA.json", "w") as f: 
    json.dump(data, f, ensure_ascii=False)

['食用', '减肥', '提神']