In [3]:

import re
import pymysql
import pandas as pd
import numpy as np
import datetime
import argparse
import csv
import logging
import os
from tqdm import tqdm,tqdm_notebook, trange
from sklearn.model_selection import train_test_split
from sklearn.metrics import matthews_corrcoef, confusion_matrix, multilabel_confusion_matrix
from matplotlib import pyplot as plt 


In [4]:

def show_map(train_results,dev_results, indicator, data_class=None):
    x = np.arange(0,len(train_results)) 
    y_train = [res[indicator] for res in train_results]
    y_dev = [res[indicator] for res in dev_results]
    #plt.rcParams['figure.dpi'] = 300
    plt.title("%s trend"%indicator) 
    plt.xlabel("Epoch") 
    plt.ylabel(indicator) 
    if indicator == 'acc':
        plt.ylim((0,1.0))
        if data_class:
            train_max_class_ratio = get_max_class_ratio(data_class.t_all_output)
            dev_max_class_ratio = get_max_class_ratio(data_class.d_all_output)
            plt.axhline(y=train_max_class_ratio,color='r',linewidth=0.5)
            plt.axhline(y=dev_max_class_ratio,color='b',linewidth=0.5)   
        
    plt.plot(x,y_train,color='red',label='train') 
    plt.plot(x,y_dev,color='blue',label='dev')
    plt.legend()
    plt.show()

def get_precision(np):
    tn, fp, fn, tp = np
    return tp/(tp+fp)

def get_recall(np):
    tn, fp, fn, tp = np
    return tp/(tp+fn)

def get_F1_score(np):
    tn, fp, fn, tp = np
    P_precision = tp/(tp+fp)
    P_recall = tp/(tp+fn)
    return 2*(P_precision*P_recall)/(P_precision+P_recall)

eval_functions = {
    'precision' : get_precision,
    'recall' : get_recall,
    'F1_score' : get_F1_score
}

def plt_class(train_results,dev_results, data_class, k):
    x = np.arange(0,len(train_results)) 
    for name, func in eval_functions.items():
        y_train = [func(res['mcm'][k].ravel()) for res in train_results]
        y_dev = [func(res['mcm'][k].ravel()) for res in dev_results]
        plt.title(f"CLASS {k} {name}") 
        plt.xlabel("Epoch") 
        plt.ylabel(f'{name}') 
        plt.ylim((0,1.0))
        plt.plot(x,y_train,color='red',label='train') 
        plt.plot(x,y_dev,color='blue',label='dev')
        if(name == 'precision'):
            train_max_class_ratio = get_class_ratio(data_class.t_all_output)[k]
            dev_max_class_ratio = get_class_ratio(data_class.d_all_output)[k]
            plt.axhline(y=train_max_class_ratio,color='r',linewidth=1)
            plt.axhline(y=dev_max_class_ratio,color='b',linewidth=1)
        plt.show()

def get_class_ratio(all_output):
    b = pd.DataFrame(all_output)
    a = b[0].value_counts(sort=False, normalize=True).sort_index()
    return a

def get_max_class_ratio(all_output):
    b = pd.DataFrame(all_output)
    a = b[0].value_counts(sort=False, normalize=True).sort_index()
    return a.max()


