In [3]:
import json
def save_json(data, filepath=r'new_data.json'):
    with open(filepath, 'w', encoding='utf-8') as fp:
        json.dump(data, fp, indent=4, ensure_ascii=False)

In [None]:
connections = json.load(open('v2/backup/nodes/connections.json'))
inner_connections = json.load(open('v2/backup/nodes/inner_connections.json'))
all_connections = connections + inner_connections
# for connection in connections:
#     connection['indicator1'] = connection['indicator1'].lower()[:-1]
#     connection['indicator2'] = connection['indicator2'].lower()[:-1]
save_json(all_connections, 'v2/backup/nodes/connections.json')

In [3]:
var_types = ['driver', 'pressure', 'state', 'impact', 'response']
for var_type in var_types:
    nodes = json.load(open(f'v2/backup/nodes/{var_type}_nodes.json'))
    nodes['variable_type'] = var_type
    save_json(nodes, f'v2/backup/nodes/{var_type}_nodes.json')

In [6]:
import glob
import json
for file in glob.glob('v2/backup/nodes/*.json'):
    data = json.load(open(file))
    save_json(data, file)

In [7]:
def process_interview(filepaths):
    interview_dict = {}
    interviews = []
    data_by_chunk = {}
    for interview_file in filepaths:
        interview_data = json.load(open(interview_file))
        interview_file = interview_file.replace("\\", "/")
        participant = interview_file.split('/')[-1].replace(".json", "")
        interview_dict[participant] = interview_data
        for chunk in interview_data:
            if chunk['topic'] in ['商業', '汙染', '貿易', '農業']:
                chunk['topic'] = '其他'
            data_by_chunk[chunk['id']] = chunk
    interview_dict = dict(sorted(interview_dict.items(), key=lambda x: int(x[0].replace("N", ""))))
    for participant, interview in interview_dict.items():
        interviews.append(
            {
                "file_name": participant,
                "data": interview
            }
        )
    return interviews

def collect_chunks(filepaths):
    interviews = process_interview(filepaths)
    chunks = []
    for interview in interviews:
        for chunk in interview['data']:
            chunks.append(chunk)
    return chunks

In [18]:
import glob
from pprint import pprint
chunks = collect_chunks(glob.glob("v2/backup/chunk/" + f'chunk_summaries_w_ktte/*.json'))
chunk_dict = {chunk['id']: chunk for chunk in chunks}
for var_type in ['driver', 'pressure', 'state', 'impact', 'response']:
    nodes = json.load(open(f'v2/backup/nodes/{var_type}_nodes.json'))
    for var_name, mentions in nodes['variable_mentions'].items():
        mentions = mentions['mentions']
        for index, mention in enumerate(mentions):
            chunk_id = mention['chunk_id']
            chunk_conversation = chunk_dict[chunk_id]['conversation']
            interviewee_message_indexes = [i for i, message in enumerate(chunk_conversation) if message['speaker'] == 0]
            indices_into_interviewee_message_indices = mention['conversation_ids']
            print(indices_into_interviewee_message_indices)
            print(interviewee_message_indexes)
            print(chunk_id, chunk_conversation)
            print(var_name, var_type)

            conversation_indices = [interviewee_message_indexes[i] for i in indices_into_interviewee_message_indices if i < len(interviewee_message_indexes)]
            mention['conversation_ids'] = conversation_indices
            mentions[index] = mention
        mentions = [mention for mention in mentions if len(mention['conversation_ids']) > 0]
        nodes['variable_mentions'][var_name] = {
            "variable_name": var_name,
            "mentions": mentions
        }
    save_json(nodes, f'v2/backup/nodes/{var_type}_nodes.json')


[4]
[1]
N6_5 [{'speaker': 1, 'content': '為什麼？'}, {'speaker': 0, 'content': '因為他們就是覺得那是他們的傳統文化，而不是你們這些人的傳統文化，所以你們不應該擁有，所以他們就是會非常排外，才他們就會講說，就希望你拿下來。然後在這裡的話就比較不會有這個問題，就是這邊的人，基本上也多半數是從外面在移來的，例如像是臺東啊或者是其他地方，所以呃，真的是直接在從這裡根生的比較少就是會，而且他們在這裡土生土長的人比較嚮往去外面，所以他們在後來的時候的歲數的比較大了一點，所以他們就比較不會對這個地方有太多的排外或者是什麼，他通常排外這裡排外的感覺比較像是進民宿業，或者是同樣的工作性質類的競爭，比較不像是那種原住民跟外地人的排外是不太一樣的。'}]
人口 driver
[5]
[1, 3, 5, 6, 7, 8, 9, 11, 12, 14]
N10_3 [{'speaker': 1, 'content': '所以您覺得還是這兩個是在一起？'}, {'speaker': 0, 'content': '我覺得這2個就是蠻相輔相成的部分。因為就是政府的運作，土地這件事情也包含到政府的運作，如果說他可以讓很多閒置閒暇的空間更有效利用的話，或許我們這邊真的也會有適合小孩的補習班。我們這邊沒有補習班，我們這邊沒有托嬰中心，我們只有幼兒園就是要抽籤，然後他必須要2歲以上才可以滿2歲以上才可以去抽。像我們這一屆是22個抽4個。'}, {'speaker': 1, 'content': '太少了吧。'}, {'speaker': 0, 'content': '所以我們就覺得，這是其實在這邊，你要鼓勵很多的生育問題，就是鼓勵大家生育，我生了小孩沒有人幫顧怎麼辦？我們就只有夫妻倆誰幫我顧小孩？'}, {'speaker': 1, 'content': '所以您的[配偶]也不是綠島人，他也是後來過。'}, {'speaker': 0, 'content': '他是臺東人，但因為後來……'}, {'speaker': 0, 'content': '[略]'}, {'speaker': 0, 'content': '我覺得就是，但反正就是他們就認為都是他們自己拼的，所以我們就覺得反正再怎麼講，我們前面就已經傻傻的付出了，那就算了，我們回