In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score
#from log import init_loger
from datetime import timedelta
import time 
import re
import nltk
from nltk.corpus import stopwords
#stop_words = stopwords.words('english')
from nltk.stem import PorterStemmer 
#st=PorterStemmer()
from textblob import TextBlob 
from textblob import Word
import matplotlib.pyplot as plt
from tensorboardX import SummaryWriter
import random
import os
import tqdm
import math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import logging
import os
def init_loger(Config):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")

    if not os.path.exists('./log/'):
        os.mkdir('./log/')
    fh = logging.FileHandler('./log/log-' + 'model' +Config.model_name+ '.log', mode='a',
                             encoding='utf-8')
    fh.setFormatter(formatter)

    console = logging.StreamHandler()
    console.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s: %(message)s"))
    console.setLevel(logging.INFO)

    logger.addHandler(fh)
    logger.addHandler(console)
    return logger

In [3]:
data = pd.read_csv('Data.csv')

In [4]:
data['date'][0].split('T')[0].split('-')

['2022', '06', '23']

In [5]:
data_name=[]
time_id=[]
for num in range(len(data)):
    name = data['date'][num].split('T')[0]
    if name not in data_name:
        data_name.append(name)
    time_id.append(name)

In [6]:
data['time']=time_id

In [7]:
data.to_csv('time_data.csv')

In [8]:
class mymodel(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.bert =BertModel.from_pretrained("bert-base-uncased")
        self.linear = nn.Sequential(nn.Linear(768,500),
                                    nn.ReLU(),
                                    nn.Linear(500,500),
                                    nn.ReLU(),
                                    nn.Linear(500,500),
                                    nn.ReLU(),
                                    nn.Linear(500,2000),
                                    nn.ReLU(),
                                    nn.Linear(2000,2))
        self.linear1 = nn.Linear(768,2)
        self.linear2 = nn.Sequential(nn.Linear(2,2000),
                                    nn.ReLU(),
                                    nn.Linear(2000,500),
                                    nn.ReLU(),
                                    nn.Linear(500,500),
                                    nn.ReLU(),
                                    nn.Linear(500,500),
                                    nn.ReLU(),
                                    nn.Linear(500,768))
        w = torch.randn((1,2))
        self.w = nn.parameter.Parameter(w)
    def forward(self,x):
        output_bert = self.bert(**x)
        out = output_bert.pooler_output
        output_auto=self.linear(out)
        output = self.linear1(out)
        output_decoder = self.linear2(output_auto)
        return output,output_auto,out,output_decoder

In [9]:
class k_cluser(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.k = config.k
        w = np.random.random((1,2))
        self.w = torch.from_numpy(w).to(config.device)
        alpha = torch.tensor(config.alpha)
        self.softmax=nn.Softmax(dim=1)
    def forward(self,x):
        output = torch.sum(torch.square((self.w-x))*self.softmax(self.k*(self.w-x)))
        return output

In [10]:
def predict(model,data_x,data_y, device,config):
    model.eval()
    inputs = {}
    input_id = input_id.to(device)
    mask = mask.to(device)
    label = label.to(device)
    inputs['input_ids']=input_id
    inputs['attention_mask'] = mask
    #tgt_mask = subsequent_mask(caps.shape[-1])
    output,outputauto,out,output_decoder=model(inputs)
    y_c = torch.argmax(output,dim=1)
    acc = accuracy_score(label.cpu(),y_c.detach().cpu())#,f1_score
    f1 = f1_score(label.cpu(),y_c.detach().cpu(),average='micro')
    pre = precision_score(label.cpu(),y_c.detach().cpu(),average='micro')
    recall = recall_score(label.cpu(),y_c.detach().cpu(),average='micro')
    k=model.w
    distance=torch.square(outputauto-k).detach().cpu()
    y_k=torch.amin(distance, 1)
    return acc,f1,pre,recall,y_k


In [11]:
def evaluate(model,data_loader, device):
    model.eval()
    total = len(data_loader)
    acc_c=[]
    f1_c = []
    pre_c=[]
    recall_c = []
    res_y_k=[]
    idxx=[]
    y_k=[]
    res_pos=[]
    res_neg=[]
    pres_pos=[]
    pres_neg=[]
    pos=0
    ppos=0
    with tqdm.tqdm(total=total) as pbar:
        for idx,date,input_id,mask,label in data_loader:
            #print(images.shape)
            inputs = {}
            input_id = input_id.to(device)
            mask = mask.to(device)
            pos += label.item()
            
            label = label.to(device)
            inputs['input_ids']=input_id
            inputs['attention_mask'] = mask
            #tgt_mask = subsequent_mask(caps.shape[-1])
            output,outputauto,out,output_decoder=model(inputs) 

            y_c = torch.argmax(output,dim=-1)
            ppos+=y_c.sum().detach().cpu()
            acc = accuracy_score(label.cpu(),y_c.detach().cpu())#,f1_score
            acc_c.append(acc)
            f1 = f1_score(label.cpu(),y_c.detach().cpu(),average='micro')
            f1_c.append(f1)
            pre = precision_score(label.cpu(),y_c.detach().cpu(),average='micro')
            pre_c.append(pre)
            recall = recall_score(label.cpu(),y_c.detach().cpu(),average='micro')
            recall_c.append(recall)
            pbar.update(1)
            k=model.w
            distance=torch.square(outputauto-k).detach().cpu()
            y=torch.amin(distance,dim=1).cpu()
            y_k.append(float(y.detach().cpu()))
            idxx.append(idx)
        pos = int(pos) 
        ppos = int(ppos)
        neg = total-pos
        pneg = total - ppos
        res_y_k.append(y_k)
        res_pos.append(pos)
        res_neg.append(neg)
        pres_pos.append(ppos)
        pres_neg.append(pneg)
    return idxx,np.mean(acc_c),np.mean(f1_c),np.mean(pre_c),np.mean(recall_c),res_y_k,res_neg,res_pos,pres_pos,pres_neg

In [12]:
class Data_loader(Dataset):
    def __init__(self,config,name,mode='training'):
        self.root = config.root
        if mode ==' training':
            self.filename = config.training_filename
        else:
            self.filename = config.val_filename
        self.mode = mode
        self.max_length = config.max_length + 1
        self.ans2label = {'Negative':0,'Positive':1}
        self.label2ans = {0:'Negative',1:'Positive'}
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower=True)
        data = pd.read_csv(os.path.join(self.root,self.filename))
        data1 = data[data['time']==name]
        length = len(data1)//10
        print(length)
        self.data = data1.iloc[:length,:]
        print(self.data.shape)
                                
    def __len__(self):
        return len(self.data)
                                
    def __getitem__(self, idx):
        data = self.data.iloc[idx]
        label = data['label']
        sent = data['content']
        date = data['date']
        sent_encoded = self.tokenizer.encode_plus(
            sent, max_length=self.max_length, padding='max_length', return_attention_mask=True, return_token_type_ids=False, truncation=True)
        sentence = np.array(sent_encoded['input_ids'])
        sentence_mask = (
            1 - np.array(sent_encoded['attention_mask'])).astype(bool)
        target = self.ans2label[label]
        sentence = torch.from_numpy(sentence)
        sentence_mask = torch.from_numpy(sentence_mask)
        return idx,date,sentence, sentence_mask,target

In [16]:
class Config:
    model_name = 'fix5'
    d_model = 768
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    root = ''
    training_filename ='train_data.csv'
    val_filename ='time_data.csv'
    max_length=20
    limit=10000
    lr_drop = 100#
    checkpoint = 'modelbert11.ckptfix6143.ckpt'
    seed = 123
    lr = 0.0001#学习率
    weight_decay = 0.0002
    batch_size=256
    num_workers=0
    start_epoch = 0 
    epochs=200
    clip_max_norm= 10
    weight = 100
    hidden_dim=768
    layer_norm_eps = 0.001
    pad_token_id = 1
    vocab_size=12609
    max_position_embeddings = 128
    dropout = 0.2
    logdir = 'saved_models/'
    alpha=1000.0
    k=2
    factor=0.5
    patience =10
    mode ='no_bert'
    r = 0.5

AttributeError: module 'torch.cpu' has no attribute 'is_available'

In [14]:
def org_predict(val_name):
    res_acc=[]
    res_f1=[]
    res_recall=[]
    res_pre=[]
    res_y_k=[]
    res_neg=[]
    res_pos=[]
    ppres_neg=[]
    ppres_pos=[]
    config=Config()
    config.val_filename=val_name
    logger = init_loger(config)
    model = mymodel(config)
    model.to(config.device)

    if os.path.exists(config.checkpoint):
        print("Loading Checkpoint...")
        checkpoint = torch.load(config.checkpoint, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        #optimizer.load_state_dict(checkpoint['optimizer'])
        #lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        #config.start_epoch = checkpoint['epoch'] + 1
        #criterion_fr.load_state_dict(checkpoint['criterion_fr'])
    for name in data_name:
        data_val = Data_loader(config,name,mode='val')
        config.batch_size = 1
        data_loader_val = DataLoader(data_val, config.batch_size,
                                     drop_last=False, num_workers=config.num_workers)
        idx,acc,f1,pre,recall,y_k,neg,pos,pres_pos,pres_neg=evaluate(model,data_loader_val,device=config.device)
        res_acc.append(acc)
        res_f1.append(f1)
        res_recall.append(recall)
        res_pre.append(pre)
        res_y_k.append(y_k)
        res_neg.append(neg)
        res_pos.append(pos)
        ppres_pos.append(pres_pos)
        ppres_neg.append(pres_neg)
    return res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg

In [15]:
def compute_ar(acc,recall,pos,neg):
    out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
    return out
def compute_ap(acc,recall,pos,neg):
    out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
    return out

In [60]:
res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg=org_predict('time_data.csv')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loading Checkpoint...
61
(61, 14)


100%|██████████| 61/61 [00:05<00:00, 10.82it/s]


891
(891, 14)


100%|██████████| 891/891 [01:20<00:00, 11.13it/s]


949
(949, 14)


100%|██████████| 949/949 [01:27<00:00, 10.79it/s]


832
(832, 14)


100%|██████████| 832/832 [01:17<00:00, 10.76it/s]


836
(836, 14)


100%|██████████| 836/836 [01:17<00:00, 10.78it/s]


809
(809, 14)


100%|██████████| 809/809 [01:15<00:00, 10.72it/s]


906
(906, 14)


100%|██████████| 906/906 [01:26<00:00, 10.49it/s]


811
(811, 14)


100%|██████████| 811/811 [01:15<00:00, 10.67it/s]


2067
(2067, 14)


100%|██████████| 2067/2067 [03:15<00:00, 10.60it/s]


3942
(3942, 14)


100%|██████████| 3942/3942 [06:35<00:00,  9.97it/s]


961
(961, 14)


100%|██████████| 961/961 [01:34<00:00, 10.22it/s]


897
(897, 14)


100%|██████████| 897/897 [01:36<00:00,  9.25it/s]


884
(884, 14)


100%|██████████| 884/884 [01:35<00:00,  9.24it/s]


1068
(1068, 14)


100%|██████████| 1068/1068 [01:52<00:00,  9.52it/s]


880
(880, 14)


100%|██████████| 880/880 [01:33<00:00,  9.40it/s]


310
(310, 14)


100%|██████████| 310/310 [00:32<00:00,  9.56it/s]


248
(248, 14)


100%|██████████| 248/248 [00:25<00:00,  9.55it/s]


909
(909, 14)


100%|██████████| 909/909 [01:35<00:00,  9.54it/s]


848
(848, 14)


100%|██████████| 848/848 [01:29<00:00,  9.52it/s]


1029
(1029, 14)


100%|██████████| 1029/1029 [01:42<00:00, 10.01it/s]


1150
(1150, 14)


100%|██████████| 1150/1150 [01:52<00:00, 10.24it/s]


822
(822, 14)


100%|██████████| 822/822 [01:20<00:00, 10.22it/s]


247
(247, 14)


100%|██████████| 247/247 [00:23<00:00, 10.30it/s]


1149
(1149, 14)


100%|██████████| 1149/1149 [01:52<00:00, 10.22it/s]


1201
(1201, 14)


100%|██████████| 1201/1201 [01:58<00:00, 10.11it/s]


652
(652, 14)


100%|██████████| 652/652 [01:02<00:00, 10.41it/s]


802
(802, 14)


100%|██████████| 802/802 [01:17<00:00, 10.37it/s]


971
(971, 14)


100%|██████████| 971/971 [01:32<00:00, 10.50it/s]


325
(325, 14)


100%|██████████| 325/325 [00:31<00:00, 10.44it/s]


1227
(1227, 14)


100%|██████████| 1227/1227 [01:58<00:00, 10.39it/s]


448
(448, 14)


100%|██████████| 448/448 [00:43<00:00, 10.29it/s]


336
(336, 14)


100%|██████████| 336/336 [00:32<00:00, 10.29it/s]


1326
(1326, 14)


100%|██████████| 1326/1326 [02:08<00:00, 10.28it/s]


344
(344, 14)


100%|██████████| 344/344 [00:34<00:00, 10.00it/s]


740
(740, 14)


100%|██████████| 740/740 [01:11<00:00, 10.38it/s]


268
(268, 14)


100%|██████████| 268/268 [00:25<00:00, 10.37it/s]


375
(375, 14)


100%|██████████| 375/375 [00:36<00:00, 10.14it/s]


1792
(1792, 14)


100%|██████████| 1792/1792 [02:12<00:00, 13.57it/s]


759
(759, 14)


100%|██████████| 759/759 [00:53<00:00, 14.06it/s]


932
(932, 14)


100%|██████████| 932/932 [01:05<00:00, 14.21it/s]


874
(874, 14)


100%|██████████| 874/874 [01:01<00:00, 14.24it/s]


1182
(1182, 14)


100%|██████████| 1182/1182 [01:23<00:00, 14.24it/s]


911
(911, 14)


100%|██████████| 911/911 [01:03<00:00, 14.29it/s]


1396
(1396, 14)


100%|██████████| 1396/1396 [01:37<00:00, 14.26it/s]


2116
(2116, 14)


100%|██████████| 2116/2116 [02:28<00:00, 14.24it/s]


811
(811, 14)


100%|██████████| 811/811 [00:55<00:00, 14.56it/s]


200
(200, 14)


100%|██████████| 200/200 [00:13<00:00, 14.81it/s]


In [61]:
res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg

([0.4262295081967213,
  0.39169472502805835,
  0.38883034773445735,
  0.41466346153846156,
  0.35167464114832536,
  0.43757725587144625,
  0.38962472406181015,
  0.36744759556103573,
  0.4223512336719884,
  0.38711314053779805,
  0.3610822060353798,
  0.4035674470457079,
  0.4095022624434389,
  0.40730337078651685,
  0.4011363636363636,
  0.3774193548387097,
  0.36693548387096775,
  0.39933993399339934,
  0.392688679245283,
  0.4878522837706511,
  0.4156521739130435,
  0.3978102189781022,
  0.3684210526315789,
  0.402088772845953,
  0.37885095753538717,
  0.3895705521472393,
  0.41895261845386533,
  0.39752832131822863,
  0.37846153846153846,
  0.3643031784841076,
  0.40625,
  0.37797619047619047,
  0.39215686274509803,
  0.36046511627906974,
  0.35135135135135137,
  0.376865671641791,
  0.44,
  0.4029017857142857,
  0.4018445322793149,
  0.3798283261802575,
  0.40160183066361554,
  0.40947546531302875,
  0.3677277716794731,
  0.4040114613180516,
  0.391304347826087,
  0.39210850801479

In [62]:
k_data = pd.DataFrame(data=None,columns=['content','label','time','distance'])

In [63]:
for i in range(1,len(data_name)+1):
    name = data_name[i-1]
    data1 = data[data['time']==name]
    length = len(data1)//10
    res_data = data1.iloc[:length,:]
    for num in range(length):
        k_data.loc[i*num]=[res_data.iloc[num,2],res_data.iloc[num,11],res_data.iloc[num,12],float(res_y_k[i-1][0][num])]

In [64]:
k_data.to_csv('distance_data.csv')

In [65]:
sc_ar_no=[]
sc_ap_no=[]
for i in range(len(data_name)):
    sc_ar=compute_ar(res_acc[i],res_recall[i],float(res_pos[i][0]),float(res_neg[i][0]))
    sc_ap=compute_ap(res_acc[i],res_pre[i],float(res_pos[i][0]),float(res_neg[i][0]))
    sc_ar_no.append(sc_ar)
    sc_ap_no.append(sc_ap)

In [66]:
result_org={'res_acc':res_acc,
                   'res_f1':res_f1,
                   'res_recall':res_recall,
                   'res_pre':res_pre,
                   'res_y_k':res_y_k,
                   'res_neg':res_neg,
                   'res_pos':res_pos,
                   'ppres_pos':ppres_pos,
                   'ppres_neg':ppres_neg}

In [67]:
class Data_loaders(Dataset):
    def __init__(self,config,name,mode='training'):
        self.root = config.root
        if mode ==' training':
            self.filename = config.training_filename
        else:
            self.filename = config.val_filename
        self.mode = mode
        self.max_length = config.max_length + 1
        self.ans2label = {'Negative':0,'Positive':1}
        self.label2ans = {0:'Negative',1:'Positive'}
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower=True)
        data = pd.read_csv(os.path.join(self.root,self.filename))
        #print('*****')
        #print(data)
        data1 = data[data['time']==name]
        #print(data1)
        length = int(len(data1)*config.r)
        if length<10:
            length = len(data1)
        print(length)
        data1.sort_values(by='distance',inplace=True,ascending=True)
        self.data = data1.iloc[:length,:]
        print(self.data.shape)
                                
    def __len__(self):
        return len(self.data)
                                
    def __getitem__(self, idx):
        data = self.data.iloc[idx]
        label = data['label']
        sent = data['content']
        date = data['time']
        sent_encoded = self.tokenizer.encode_plus(
            sent, max_length=self.max_length, padding='max_length', return_attention_mask=True, return_token_type_ids=False, truncation=True)
        sentence = np.array(sent_encoded['input_ids'])
        sentence_mask = (
            1 - np.array(sent_encoded['attention_mask'])).astype(bool)
        target = self.ans2label[label]
        sentence = torch.from_numpy(sentence)
        sentence_mask = torch.from_numpy(sentence_mask)
        return idx,date,sentence, sentence_mask,target

In [68]:
def pre_predict(val_name,data_names,r):
    data_names=data_names
    res_acc=[]
    res_f1=[]
    res_recall=[]
    res_pre=[]
    res_y_k=[]
    res_neg=[]
    res_pos=[]
    ppres_neg=[]
    ppres_pos=[]
    config=Config()
    config.val_filename=val_name
    config.r=r
    logger = init_loger(config)
    model = mymodel(config)
    if os.path.exists(config.checkpoint):
        print("Loading Checkpoint...")
        checkpoint = torch.load(config.checkpoint, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        #optimizer.load_state_dict(checkpoint['optimizer'])
        #lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        #config.start_epoch = checkpoint['epoch'] + 1
        #criterion_fr.load_state_dict(checkpoint['criterion_fr'])
    model.to(config.device)
    #print(data_names)
    for i in range(len(data_names)):
        name = data_names[i]
        #print(name)
        data_val = Data_loaders(config,name,mode='val')
        config.batch_size = 1
        data_loader_val = DataLoader(data_val, config.batch_size,
                                     drop_last=False, num_workers=config.num_workers)
        idx,acc,f1,pre,recall,y_k,neg,pos,pres_pos,pres_neg=evaluate(model,data_loader_val,device='cpu')
        res_acc.append(acc)
        res_f1.append(f1)
        res_recall.append(recall)
        res_pre.append(pre)
        res_y_k.append(y_k)
        res_neg.append(neg)
        res_pos.append(pos)
        ppres_pos.append(pres_pos)
        ppres_neg.append(pres_neg)
    return res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg

In [69]:
def compute_result(file_name,data_name):
    data_name=data_name
    file_name=file_name
    result={}
    for r in [0.2,0.4,0.6,0.8]:
        res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg=pre_predict(file_name,data_name,r)
        result[r]={'res_acc':res_acc,
                   'res_f1':res_f1,
                   'res_recall':res_recall,
                   'res_pre':res_pre,
                   'res_y_k':res_y_k,
                   'res_neg':res_neg,
                   'res_pos':res_pos,
                   'ppres_pos':ppres_pos,
                   'ppres_neg':ppres_neg}
    return result
    

In [71]:
result = compute_result('distance_data.csv',data_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loading Checkpoint...


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [72]:
result[0.2]

NameError: name 'result' is not defined

In [None]:
def compute_evalue(para_org,para,data_name):
    result_error=pd.DataFrame(data=None,columns=['name','time','dc','dp','score'])
    data_name=data_name
    j=0
    for num in list(para.keys()):#res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg
        for i in range(len(data_name)):
            name=1.0
            time_id = data_name[i]
            score = np.mean(para_org['res_y_k'][i])
            sg = para_org['res_pos'][i][0]-para_org['res_neg'][i][0]
            sp = para_org['ppres_pos'][i][0]-para_org['ppres_neg'][i][0]
            dp =sg-sp
            dp = abs(dp)
            sc = compute_ap(para_org['res_acc'][i],para_org['res_recall'][i],float(para_org['res_pos'][i][0]),float(para_org['res_neg'][i][0]))
            dc = sc-sg
            dc = abs(dc)
            result_error.loc[j]=[name,time_id,dc,dp,score]
            j+=1
    for name in list(para.keys()):
        for num in list(para[name].keys()):#res_acc,res_f1,res_recall,res_pre,res_y_k,res_neg,res_pos,ppres_pos,ppres_neg
                for i in range(len(data_name)):
                    time_id = data_name[i]
                    score = np.mean(para[name]['res_y_k'][i])
                    sg = para[name]['res_pos'][i][0]-para[name]['res_neg'][i][0]
                    sp = para[name]['ppres_pos'][i][0]-para[name]['ppres_neg'][i][0]
                    dp =sg-sp
                    dp = abs(dp)
                    sc = compute_ap(para[name]['res_acc'][i],para[name]['res_recall'][i],float(para[name]['res_pos'][i][0]),float(para[name]['res_neg'][i][0]))
                    dc = sc-sg
                    dc = abs(dc)
                    result_error.loc[j]=[name,time_id,dc,dp,score]
                    j+=1
    return result_error

In [None]:
result_=compute_evalue(result_org,result,data_name)

  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*acc)/(2*recall-1)+1)+neg*((0-2*acc)/(2*recall-1)+1)
  out = pos*((2-2*ac

In [None]:
result_org[0]

KeyError: 0

In [None]:
result_

In [None]:
import matplotlib.pyplot as plt

In [None]:
def vis(data,name,v='dp'):
    name=name
    data_x = data[data['name']==name]
    if v =='dp':
        plt.plot(data_x.iloc[:,3])
    else:
        plt.plot(data_x.iloc[:,2])
    plt.title('sample-frequency : {:.2f}'.format(name)+'  evaluation :'+v)
    plt.show()
    

In [None]:
for r in [0.2,0.4,0.6,0.8,1.0]:
    vis(result_,r,'dp')

In [None]:
for r in [0.2,0.4,0.6,0.8,1.0]:
    vis(result_,r,'dc')

In [None]:
result_.to_csv('data_evaluation.csv')

In [None]:
def vis_score(data,name):
    name=name
    data_x = data[data['name']==name]
    plt.plot(data_x.iloc[:,-1])
    plt.title('sample-frequency : {:.2f}'.format(name))
    plt.show()

In [None]:
def np_move_avg(a,n,mode="same"):
    return(np.convolve(a, np.ones((n,))/n, mode=mode))

In [None]:
for r in [0.2,0.4,0.6,0.8,1.0]:
    vis_score(result_,r)
    

In [None]:
result_=pd.read_csv('data_evaluation.csv',index_col=False)
for r in [0.2,0.4,0.6,0.8,1.0]:
    data_=result_[result_['name']==r]
    data_x = np.array(data_.iloc[:,-1]*1000000).astype(np.int32)
    cc = np_move_avg(data_x,10)
    plt.plot(cc)
    plt.title('sample-frequency : {:.2f}'.format(r))
    plt.show()
                

In [None]:
data_=result_[result_['name']==0.2]
data_x = data_.iloc[:,-1]*1000000

In [None]:
data_.iloc[:,-1]