In [1]:
## データセットemotional_persuasive_dialogueを整形してデータベースに格納する
## このセルは1回実行しておくだけでOK

import random
import sqlite3
from pathlib import Path
import time

def select_label(mode, label_options):
    """
    受諾度合い、感情ラベルで、3つ中2つ以上一致している
    ラベルをその発話のラベルとして採用する
    """
    
    # ラベル候補のリストからランダムに3つ要素を取得する
    label_options = random.sample(label_options.split("-"), 3)
    # 改行文字を取り除く
    # 感情状態ラベルの末尾には「,」が付いていて、改行文字が末尾に来ない
    # 受諾度合いラベルの末尾には「,」が付いていないため、改行文字が末尾に来てしまう
    # 改行文字を除去しないと、ラベルの付与数を正常にcountできない
    for i in range(len(label_options)):
        label_options[i] = label_options[i].strip()
    
    # 感情ラベルと受諾度合いラベルのパターン
    emotions = ["平静", "安心", "喜び", "怒り", "悲しみ"]
    acceptances = [str(i) for i in range(1, 6)]
    
    # 各ラベルが何個付与されているかを数えて、辞書として記録する
    count_dict = {}
    if mode == "system_emotion" or mode == "user_emotion":
        for x in emotions:
            count_dict[x] = label_options.count(x)
    elif mode == "user_acceptance":
        for x in acceptances:
            count_dict[x] = label_options.count(x)
    
    # 2個以上付与されているラベルが何なのかを求める
    # 2つ以上同じラベルが付与されていたら、リストの要素は必ず1つになる
    # 1つずつ違うラベルが付与されていたら、resultは空になる
    result = [k for k, v in count_dict.items() if v >= 2]
    if result != []:
        return result[0]
    else:
        return "NONE"

def data_preperation():
    """
    データセットをデータベースに登録する。
    受諾度合いラベルと感情状態ラベルは、1発話に付与されているもの先頭3個を使う。
    その3個のうち2個以上同じものがあれば、その発話のラベルとして採用する。
    1つも同じものが無ければ、NONEを付与する。
    データベースに登録はするが、実際の学習では、NONEが付与されたものは除去する。
    """
    
    origin_path = "../emotional_persuasive_dialogue/"
    # データセットを格納するデータベース
    db = sqlite3.connect("dialogue_corpus.db")
    c = db.cursor()
    
    for scene in ["cleaning", "exercise", "game", "lunch", "sleep"]:
        for i in range(0, 200):
            # 5場面 * 200対話
            # Pathオブジェクト
            path = Path(origin_path + scene + "/" + str(i) + ".txt.label")
            
            # executeメソッドでSQL文を実行する
            # カラム：talker | talk_content | acceptance | emotion
            table_name = scene + str(i)
            create_table = "create table " + table_name + " (talker text, talk_content text, emotion text, acceptance text)"
            c.execute(create_table)
            
            with path.open('r', encoding='utf-8') as file:
                for line in file:
                    # データセットから話者などの情報を取得する
                    columns = line.split(",") # 例：['ロボット1', 'Ａ君、お部屋を掃除をしましょう。', '平静-平静-安心-怒り', '\n']
                    talker, talk_content, emotion, acceptance = "", "", "", ""
                    if columns[0].find("ロボット") != -1:
                        # システムの発話を読み込んでいる時
                        talker, talk_content, emotion, acceptance = "system", columns[1], select_label("system_emotion", columns[2]), "-"
                    elif columns[0].find("A君") != -1:
                        # ユーザの発話を読み込んでいる時
                        talker, talk_content, emotion, acceptance = "user", columns[1], select_label("user_emotion", columns[2]), select_label("user_acceptance", columns[3])
                    
                    # SQL文に値をセットする場合は，Pythonのformatメソッドなどは使わずに，
                    # セットしたい場所に?を記述し，executeメソッドの第2引数に?に当てはめる値を
                    # タプルで渡す．
                    sql = "insert into " + table_name + " (talker, talk_content, emotion, acceptance) values (?, ?, ?, ?)"
                    data = (talker, talk_content, emotion, acceptance)
                    c.execute(sql, data)

                    db.commit()
            print("Table:{} has been registered.".format(table_name))
        print("Scene:{} has been registered.".format(scene))
    db.close()
    print("dialogue_corpus.db has been structed completely.")

before_time = time.time()
data_preperation()
after_time = time.time()
print("By time.time(): data_preperation took about {} seconds.".format(after_time - before_time))

Table:cleaning0 has been registered.
Table:cleaning1 has been registered.
Table:cleaning2 has been registered.
Table:cleaning3 has been registered.
Table:cleaning4 has been registered.
Table:cleaning5 has been registered.
Table:cleaning6 has been registered.
Table:cleaning7 has been registered.
Table:cleaning8 has been registered.
Table:cleaning9 has been registered.
Table:cleaning10 has been registered.
Table:cleaning11 has been registered.
Table:cleaning12 has been registered.
Table:cleaning13 has been registered.
Table:cleaning14 has been registered.
Table:cleaning15 has been registered.
Table:cleaning16 has been registered.
Table:cleaning17 has been registered.
Table:cleaning18 has been registered.
Table:cleaning19 has been registered.
Table:cleaning20 has been registered.
Table:cleaning21 has been registered.
Table:cleaning22 has been registered.
Table:cleaning23 has been registered.
Table:cleaning24 has been registered.
Table:cleaning25 has been registered.
Table:cleaning26 has b

Table:exercise13 has been registered.
Table:exercise14 has been registered.
Table:exercise15 has been registered.
Table:exercise16 has been registered.
Table:exercise17 has been registered.
Table:exercise18 has been registered.
Table:exercise19 has been registered.
Table:exercise20 has been registered.
Table:exercise21 has been registered.
Table:exercise22 has been registered.
Table:exercise23 has been registered.
Table:exercise24 has been registered.
Table:exercise25 has been registered.
Table:exercise26 has been registered.
Table:exercise27 has been registered.
Table:exercise28 has been registered.
Table:exercise29 has been registered.
Table:exercise30 has been registered.
Table:exercise31 has been registered.
Table:exercise32 has been registered.
Table:exercise33 has been registered.
Table:exercise34 has been registered.
Table:exercise35 has been registered.
Table:exercise36 has been registered.
Table:exercise37 has been registered.
Table:exercise38 has been registered.
Table:exerci

Table:game29 has been registered.
Table:game30 has been registered.
Table:game31 has been registered.
Table:game32 has been registered.
Table:game33 has been registered.
Table:game34 has been registered.
Table:game35 has been registered.
Table:game36 has been registered.
Table:game37 has been registered.
Table:game38 has been registered.
Table:game39 has been registered.
Table:game40 has been registered.
Table:game41 has been registered.
Table:game42 has been registered.
Table:game43 has been registered.
Table:game44 has been registered.
Table:game45 has been registered.
Table:game46 has been registered.
Table:game47 has been registered.
Table:game48 has been registered.
Table:game49 has been registered.
Table:game50 has been registered.
Table:game51 has been registered.
Table:game52 has been registered.
Table:game53 has been registered.
Table:game54 has been registered.
Table:game55 has been registered.
Table:game56 has been registered.
Table:game57 has been registered.
Table:game58 h

Table:lunch65 has been registered.
Table:lunch66 has been registered.
Table:lunch67 has been registered.
Table:lunch68 has been registered.
Table:lunch69 has been registered.
Table:lunch70 has been registered.
Table:lunch71 has been registered.
Table:lunch72 has been registered.
Table:lunch73 has been registered.
Table:lunch74 has been registered.
Table:lunch75 has been registered.
Table:lunch76 has been registered.
Table:lunch77 has been registered.
Table:lunch78 has been registered.
Table:lunch79 has been registered.
Table:lunch80 has been registered.
Table:lunch81 has been registered.
Table:lunch82 has been registered.
Table:lunch83 has been registered.
Table:lunch84 has been registered.
Table:lunch85 has been registered.
Table:lunch86 has been registered.
Table:lunch87 has been registered.
Table:lunch88 has been registered.
Table:lunch89 has been registered.
Table:lunch90 has been registered.
Table:lunch91 has been registered.
Table:lunch92 has been registered.
Table:lunch93 has be

Table:sleep96 has been registered.
Table:sleep97 has been registered.
Table:sleep98 has been registered.
Table:sleep99 has been registered.
Table:sleep100 has been registered.
Table:sleep101 has been registered.
Table:sleep102 has been registered.
Table:sleep103 has been registered.
Table:sleep104 has been registered.
Table:sleep105 has been registered.
Table:sleep106 has been registered.
Table:sleep107 has been registered.
Table:sleep108 has been registered.
Table:sleep109 has been registered.
Table:sleep110 has been registered.
Table:sleep111 has been registered.
Table:sleep112 has been registered.
Table:sleep113 has been registered.
Table:sleep114 has been registered.
Table:sleep115 has been registered.
Table:sleep116 has been registered.
Table:sleep117 has been registered.
Table:sleep118 has been registered.
Table:sleep119 has been registered.
Table:sleep120 has been registered.
Table:sleep121 has been registered.
Table:sleep122 has been registered.
Table:sleep123 has been register

In [1]:
import my_data_preparation_kit as my_dpkit

## 1つ下のセルを実行した時は、my_data_preparation_kitの内容をこのファイルに直書きしていた。
## しかし、関数train()を呼び出したら、出力ログが溜まりすぎたせいか、PCがフリーズした。
## -> データの前処理の関数を1つのpythonファイルにまとめる。
## -> 実際の処理は複数のノートブックに分割し、それぞれで上記のpythonファイルをimportして利用する。
## これで出力ログが溜まりすぎるのを防げるはず。

In [2]:
## word_tableとsynonym_tableを作成してJSON形式で保存する
## このセルは1回実行しておくだけでOK。それ以降はJSONファイルを読み込む

my_dpkit.make_and_save_tables()

Dialogue:cleaning0 has gotten from dialogue_corpus.db.
Dialogue:cleaning1 has gotten from dialogue_corpus.db.
Dialogue:cleaning2 has gotten from dialogue_corpus.db.
Dialogue:cleaning3 has gotten from dialogue_corpus.db.
Dialogue:cleaning4 has gotten from dialogue_corpus.db.
Dialogue:cleaning5 has gotten from dialogue_corpus.db.
Dialogue:cleaning6 has gotten from dialogue_corpus.db.
Dialogue:cleaning7 has gotten from dialogue_corpus.db.
Dialogue:cleaning8 has gotten from dialogue_corpus.db.
Dialogue:cleaning9 has gotten from dialogue_corpus.db.
Dialogue:cleaning10 has gotten from dialogue_corpus.db.
Dialogue:cleaning11 has gotten from dialogue_corpus.db.
Dialogue:cleaning12 has gotten from dialogue_corpus.db.
Dialogue:cleaning13 has gotten from dialogue_corpus.db.
Dialogue:cleaning14 has gotten from dialogue_corpus.db.
Dialogue:cleaning15 has gotten from dialogue_corpus.db.
Dialogue:cleaning16 has gotten from dialogue_corpus.db.
Dialogue:cleaning17 has gotten from dialogue_corpus.db.
Di

Dialogue:exercise108 has gotten from dialogue_corpus.db.
Dialogue:exercise109 has gotten from dialogue_corpus.db.
Dialogue:exercise110 has gotten from dialogue_corpus.db.
Dialogue:exercise111 has gotten from dialogue_corpus.db.
Dialogue:exercise112 has gotten from dialogue_corpus.db.
Dialogue:exercise113 has gotten from dialogue_corpus.db.
Dialogue:exercise114 has gotten from dialogue_corpus.db.
Dialogue:exercise115 has gotten from dialogue_corpus.db.
Dialogue:exercise116 has gotten from dialogue_corpus.db.
Dialogue:exercise117 has gotten from dialogue_corpus.db.
Dialogue:exercise118 has gotten from dialogue_corpus.db.
Dialogue:exercise119 has gotten from dialogue_corpus.db.
Dialogue:exercise120 has gotten from dialogue_corpus.db.
Dialogue:exercise121 has gotten from dialogue_corpus.db.
Dialogue:exercise122 has gotten from dialogue_corpus.db.
Dialogue:exercise123 has gotten from dialogue_corpus.db.
Dialogue:exercise124 has gotten from dialogue_corpus.db.
Dialogue:exercise125 has gotten

Dialogue:lunch20 has gotten from dialogue_corpus.db.
Dialogue:lunch21 has gotten from dialogue_corpus.db.
Dialogue:lunch22 has gotten from dialogue_corpus.db.
Dialogue:lunch23 has gotten from dialogue_corpus.db.
Dialogue:lunch24 has gotten from dialogue_corpus.db.
Dialogue:lunch25 has gotten from dialogue_corpus.db.
Dialogue:lunch26 has gotten from dialogue_corpus.db.
Dialogue:lunch27 has gotten from dialogue_corpus.db.
Dialogue:lunch28 has gotten from dialogue_corpus.db.
Dialogue:lunch29 has gotten from dialogue_corpus.db.
Dialogue:lunch30 has gotten from dialogue_corpus.db.
Dialogue:lunch31 has gotten from dialogue_corpus.db.
Dialogue:lunch32 has gotten from dialogue_corpus.db.
Dialogue:lunch33 has gotten from dialogue_corpus.db.
Dialogue:lunch34 has gotten from dialogue_corpus.db.
Dialogue:lunch35 has gotten from dialogue_corpus.db.
Dialogue:lunch36 has gotten from dialogue_corpus.db.
Dialogue:lunch37 has gotten from dialogue_corpus.db.
Dialogue:lunch38 has gotten from dialogue_corp

Dialogue:sleep110 has gotten from dialogue_corpus.db.
Dialogue:sleep111 has gotten from dialogue_corpus.db.
Dialogue:sleep112 has gotten from dialogue_corpus.db.
Dialogue:sleep113 has gotten from dialogue_corpus.db.
Dialogue:sleep114 has gotten from dialogue_corpus.db.
Dialogue:sleep115 has gotten from dialogue_corpus.db.
Dialogue:sleep116 has gotten from dialogue_corpus.db.
Dialogue:sleep117 has gotten from dialogue_corpus.db.
Dialogue:sleep118 has gotten from dialogue_corpus.db.
Dialogue:sleep119 has gotten from dialogue_corpus.db.
Dialogue:sleep120 has gotten from dialogue_corpus.db.
Dialogue:sleep121 has gotten from dialogue_corpus.db.
Dialogue:sleep122 has gotten from dialogue_corpus.db.
Dialogue:sleep123 has gotten from dialogue_corpus.db.
Dialogue:sleep124 has gotten from dialogue_corpus.db.
Dialogue:sleep125 has gotten from dialogue_corpus.db.
Dialogue:sleep126 has gotten from dialogue_corpus.db.
Dialogue:sleep127 has gotten from dialogue_corpus.db.
Dialogue:sleep128 has gotten

96 usertalks have been registered.
97 usertalks have been registered.
98 usertalks have been registered.
99 usertalks have been registered.
100 usertalks have been registered.
101 usertalks have been registered.
102 usertalks have been registered.
103 usertalks have been registered.
104 usertalks have been registered.
105 usertalks have been registered.
106 usertalks have been registered.
107 usertalks have been registered.
108 usertalks have been registered.
109 usertalks have been registered.
110 usertalks have been registered.
111 usertalks have been registered.
112 usertalks have been registered.
113 usertalks have been registered.
114 usertalks have been registered.
115 usertalks have been registered.
116 usertalks have been registered.
117 usertalks have been registered.
118 usertalks have been registered.
119 usertalks have been registered.
120 usertalks have been registered.
121 usertalks have been registered.
122 usertalks have been registered.
123 usertalks have been register

324 usertalks have been registered.
325 usertalks have been registered.
326 usertalks have been registered.
327 usertalks have been registered.
328 usertalks have been registered.
329 usertalks have been registered.
330 usertalks have been registered.
331 usertalks have been registered.
332 usertalks have been registered.
333 usertalks have been registered.
334 usertalks have been registered.
335 usertalks have been registered.
336 usertalks have been registered.
337 usertalks have been registered.
338 usertalks have been registered.
339 usertalks have been registered.
340 usertalks have been registered.
341 usertalks have been registered.
342 usertalks have been registered.
343 usertalks have been registered.
344 usertalks have been registered.
345 usertalks have been registered.
346 usertalks have been registered.
347 usertalks have been registered.
348 usertalks have been registered.
349 usertalks have been registered.
350 usertalks have been registered.
351 usertalks have been regi

552 usertalks have been registered.
553 usertalks have been registered.
554 usertalks have been registered.
555 usertalks have been registered.
556 usertalks have been registered.
557 usertalks have been registered.
558 usertalks have been registered.
559 usertalks have been registered.
560 usertalks have been registered.
561 usertalks have been registered.
562 usertalks have been registered.
563 usertalks have been registered.
564 usertalks have been registered.
565 usertalks have been registered.
566 usertalks have been registered.
567 usertalks have been registered.
568 usertalks have been registered.
569 usertalks have been registered.
570 usertalks have been registered.
571 usertalks have been registered.
572 usertalks have been registered.
573 usertalks have been registered.
574 usertalks have been registered.
575 usertalks have been registered.
576 usertalks have been registered.
577 usertalks have been registered.
578 usertalks have been registered.
579 usertalks have been regi

780 usertalks have been registered.
781 usertalks have been registered.
782 usertalks have been registered.
783 usertalks have been registered.
784 usertalks have been registered.
785 usertalks have been registered.
786 usertalks have been registered.
787 usertalks have been registered.
788 usertalks have been registered.
789 usertalks have been registered.
790 usertalks have been registered.
791 usertalks have been registered.
792 usertalks have been registered.
793 usertalks have been registered.
794 usertalks have been registered.
795 usertalks have been registered.
796 usertalks have been registered.
797 usertalks have been registered.
798 usertalks have been registered.
799 usertalks have been registered.
800 usertalks have been registered.
801 usertalks have been registered.
802 usertalks have been registered.
803 usertalks have been registered.
804 usertalks have been registered.
805 usertalks have been registered.
806 usertalks have been registered.
807 usertalks have been regi