# DiscordBot x AI智能
此處為Discord Bot x AI客服機器人的程式演示
由於部分原始碼遺失，僅展示訓練的部分


## 1.設定/訓練模組


In [None]:
from ckiptagger import data_utils, construct_dictionary, WS, POS, NER
import json
import numpy as np
import torch
import random
import torch.nn as nn
import os
from torch.utils.data import Dataset, DataLoader

In [None]:
#模型下載
try:
    f = open('data.zip','r')
    print("成功讀取 data.zip 檔案")
    f.close()
except:
    print("未讀取到 data.zip 檔案，正在重新下載...")
    data_utils.download_data_gdown("./")
finally:
    wsmodel = WS("./data")
    posmodel = POS("./data")
    nermodel = NER("./data")
print("設定 ws,pos,ner 成功")

In [None]:

#斷詞模型
def cutwords(sentence_list):
    global wsmodel,posmodel,nermodel
    ws = wsmodel
    pos = posmodel
    ner = nermodel
    #斷詞
    word_sentence_list = ws(
        sentence_list,
        sentence_segmentation = True, # To consider delimiters
        segment_delimiter_set = {",", "。", ":", "?", "!", ";", "，","「","」"} # This is the defualt set of delimiters
        # recommend_dictionary = dictionary1, # words in this dictionary are encouraged
        # coerce_dictionary = dictionary2, # words in this dictionary are forced
    )
    #斷詞語法
    pos_sentence_list = pos(word_sentence_list)
    #斷詞增測到的 tag (暫時不會使用到)
    entity_sentence_list = ner(word_sentence_list, pos_sentence_list)
    #準備輸出陣列
    assert len(word_sentence_list) == len(pos_sentence_list)

    sentence_list_output = []
    for i in range(len(sentence_list)):
        for word,pos in zip(word_sentence_list[i],pos_sentence_list[i]):
            sentence_list_output.append(f"{word}({pos})")
    del ws,pos,ner
    return sentence_list_output

#詞帶模型
def bag_of_words(tokenized_sentence, all_words):

    bag = np.zeros(len(all_words),dtype=np.float32)

    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0
    return bag

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 打開文字資料檔
try:
    with open('intents.json','r') as f:
        intents_box = json.load(f)
except:
    intents_box = intents_box

#引入學習過的模型
FILE = 'data.pth'
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

In [None]:
#讀取json
try:
    with open('intents.json','r') as f:
        intents_box = json.load(f)
except:
    print("讀取intents.json失敗，將使用預設測試資料")

#處理檔案
all_words = []
tags = []
xy = []

for intent in intents_box['intents_list']:
    tag = intent['tag']
    tags.append(tag)

    for sentence in intent['patterns']:
        tsave = cutwords([str(sentence)])
        all_words.extend(tsave)
        xy.append((tsave,tag))
all_words = sorted(set(all_words))
tags = sorted(set(tags))

In [None]:
#訓練檔案處理
X_train = [] #輸入: BOW向量
y_train = [] #輸出: 儲存tag(分類)

for (sentence, tag) in xy:
    bag = bag_of_words(sentence,all_words)
    X_train.append(bag)

    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
#Pytorch神經網路設定 (Hyperparameter)
batch_size = 8
input_size = len(X_train[0])
hidden_size = 6
output_size = len(tags)
learning_rate = 0.01
num_epochs = 987

#創建pytorch數據集
class ChatDataset(Dataset):
    #初始化函式
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    #用序號取得資料
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    #取得training set大小
    def __len__(self):
        return self.n_samples
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.relu(self.l1(x))
        out = self.relu(self.l2(out))
        out = self.l3(out)
        return out

In [None]:
#Pytorch神經網路訓練
def main():

    # 模型、數據集、硬體整合
    dataset = ChatDataset()
    train_loader  = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = NeuralNet(input_size, hidden_size, output_size).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        train_loss = 0.0
        for (sentence, tag) in train_loader:
            # 梯度歸零
            optimizer.zero_grad()

            sentence = sentence.to(device)
            tag = tag.to(dtype=torch.long).to(device)

            # 前向傳播(forward propagation)
            outputs = model(sentence)
            loss = criterion(outputs, tag)

            # 反向傳播(backward propagation)
            loss.backward()

            # 更新所有參數
            optimizer.step()

        if (epoch+1) % int(num_epochs/10) == 0:
            print(f'訓練次數[{epoch+1}/{num_epochs}], 損失函數:{loss.item():.8f}')

    print(f'訓練完畢，損失函數為 {loss.item():.4f}')

    # 將訓練完的資料、分類器儲存起來，存在data.pth這個檔案裡
    data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "hidden_size": hidden_size,
    "output_size": output_size,
    "all_words": all_words,
    "tags": tags
    }
    FILE = "data.pth"
    torch.save(data, FILE)

    print(f'以儲存訓練結果檔案至{FILE}')

if __name__=="__main__":
    try:
        f = open('data.pth','r')
        f.close()
        tinput = input("已讀取到先前使用過的模型，是否要重新訓練(是/否)?  ")
        if tinput == "是":
            print("重新訓練新模型")
            file_path = "data.pth"
            os.remove(file_path)
            raise FileNotFoundError
    except FileNotFoundError:
        main()
else:
    print("__name__ != __main__ !!")

## 2.對話

In [None]:
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
# 將模型從"訓練模式"轉換成"預測模式"
model.eval()

#--設計機器人對話--#

#機器人名稱與起始招呼語
bot_name = "中文聊天機器人"
print("機器人已準備就緒，輸入於終端機即可開始聊天，輸入\"quit\"結束程式")

while True:
    sentence = input("You: ")
    if sentence == "quit":
        break

    # 處理輸入的語句
    sentence = cutwords(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    # 放入模型進行預測
    output = model(X)
    max_value, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    # 指定在橫列中找出最大值
    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    # 如果預測的可能性大於85%，就從該情境隨機取得一個句子來回覆
    if prob.item() > 0.85:
        for intent in intents_box['intents_list']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}:我不知道你在說甚麼")
    print(f"\n最有可能的標籤是 : {tag}， 預測的可能性為: {prob.item()}")
