In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
sys.path.append("/content/drive/MyDrive/Colab Notebooks/function/PML-MD")

In [None]:
import torch
import time
import os
import torch.optim as optim
import numpy as np
from model import LinearNet
from dataset import get_loader
from scipy.io import savemat
from criteria import hLoss, rLoss, oError, Conv, avgPre

device = ('cuda' if torch.cuda.is_available() else 'cpu')

import matplotlib.pyplot as plt
import pandas as pd
def TablePlot(df,w,h,outputPath):
    fig, ax = plt.subplots(figsize=(w,h))
    ax.axis('off')
    ax.table(
        df.values,
        #colLabels = df.columns,
        loc = 'center',
        bbox=[0,0,1,1],
        fontsize = 100
    )
    plt.savefig(outputPath)
    plt.show()
def criterion(predictions, confidence):

    assert predictions.shape == confidence.shape

    N, C = confidence.shape

    loss_sum = torch.zeros(N, 1, dtype=torch.float, device=device)

    ones = torch.ones_like(confidence, dtype=torch.float, device=device)
    zeros = torch.zeros_like(confidence, dtype=torch.float, device=device)

    for i in range(C):
        confidence_i = confidence[:, i].view(N, -1)
        prediction_i = predictions[:, i].view(N, -1)

        loss = torch.max(zeros, confidence_i - confidence) * torch.max(zeros, ones - (prediction_i - predictions))
        loss_sum += torch.sum(loss, dim=-1, keepdim=True) / ((C - 1) * 1.0)

    return loss_sum.mean()

def test(net, test_loader):

    ######################################
    #信頼度のしきい値
    threshold = 0.8
    ######################################

    net.eval()

    hLoss_list = []
    rLoss_list = []
    oError_list = []
    conv_list = []
    avgPre_list = []

    for itr, (inputs, labels) in enumerate(test_loader):

        inputs = inputs.to(device)
        outputs = net(inputs)
        prelabel = (torch.sigmoid(outputs) > threshold).float()

        hLoss_list.append(hLoss(outputs, labels))
        rLoss_list.append(rLoss(outputs, labels))
        oError_list.append(oError(outputs, labels))
        conv_list.append(Conv(outputs, labels))
        avgPre_list.append(avgPre(outputs, labels))

    hamming_loss = np.mean(hLoss_list)
    ranking_loss = np.mean(rLoss_list)
    one_error = np.mean(oError_list)
    coverage = np.mean(conv_list)
    avg_precision = np.mean(avgPre_list)

    return hamming_loss, ranking_loss, one_error, coverage, avg_precision, outputs.cpu().detach().numpy() ,prelabel.cpu().numpy()

def train_common(net, optimizer, train_loader):
    train_loss = 0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        net.train()
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    return train_loss

def train_meta(net, optimizer, train_loader, meta_loader):
    train_loss = 0.0
    meta_loss = 0.0
    clean_iter = iter(meta_loader)
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        meta_net = LinearNet(num_inputs=features_num, num_outputs=labels_num)
        meta_net.load_state_dict(net.state_dict())
        meta_net.to(device)
        epsilon = torch.zeros_like(labels, requires_grad=True)

        y_f_hat = meta_net(inputs)
        l_f_meta = criterion(y_f_hat, epsilon)
        meta_net.zero_grad()

        grads = torch.autograd.grad(l_f_meta, (meta_net.params()), create_graph=True)
        meta_net.update_params(lr, source_params=grads)

        try:
            val_data, val_labels = next(clean_iter)
        except StopIteration:
            clean_iter = iter(meta_loader)
            val_data, val_labels = next(clean_iter)

        val_data, val_labels = val_data.to(device), val_labels.to(device)

        y_g_hat = meta_net(val_data)
        l_g_meta = criterion(y_g_hat, val_labels)

        grad_eps = torch.autograd.grad(l_g_meta, epsilon, only_inputs=True)[0]

        # computing and normalizing the confidence matrix P
        # p_tilde = torch.clamp(epsilon - grad_eps, min=0)
        p_tilde = torch.clamp(-grad_eps, min=0)
        p_tilde *= labels

        max_row = torch.max(p_tilde, dim=1, keepdim=True)[0]
        ones = torch.ones_like(max_row)
        max_row = torch.where(max_row == 0, ones, max_row)
        p = p_tilde / max_row

        y_f_hat = net(inputs)
        l_f = criterion(y_f_hat, p)

        optimizer.zero_grad()
        l_f.backward()
        optimizer.step()

        meta_loss += l_g_meta.item()
        train_loss += l_f.item()

    return meta_loss, train_loss


def baseline(train_loader, test_loader, meta_loader, num_epoch=500, clean=True):
    # method = baseline for baseline with meta data, else for baseline without meta data and ground-truth

    net = LinearNet(num_inputs=features_num, num_outputs=labels_num)
    net = net.to(device)
    optimizer = optim.SGD(params=net.params(), lr=lr, momentum=momentum, weight_decay=weight_decay)

    hLoss_list = []
    rLoss_list = []
    oError_list = []
    conv_list = []
    avgPre_list = []

    for epoch in range(num_epoch):
        loss = 0.0
        # adjust_learning_rate(optimizer, epoch, lr)
        if clean:
            loss += train_common(net, optimizer, meta_loader)
            loss += train_common(net, optimizer, train_loader)
            loss /= (len(meta_loader) + len(train_loader))
        else:
            loss += train_common(net, optimizer, train_loader)
            loss /= len(train_loader)

        hamming_loss, ranking_loss, one_error, coverage, avg_precision, outputs, p_labels = test(net, test_loader)

        print('Batch: [{:0>4}/{:0>4}] '.format(epoch + 1, num_epoch),
              'training loss: {:.4f} '.format(loss),
              'hLoss: {:.4f} '.format(hamming_loss),
              'rLoss: {:.4f} '.format(ranking_loss),
              'oError: {:.4f} '.format(one_error),
              'conv: {:.4f} '.format(coverage),
              'avgPre: {:.4f}'.format(avg_precision))

        """hLoss_list.append(hamming_loss)
        rLoss_list.append(ranking_loss)
        oError_list.append(one_error)
        conv_list.append(coverage)
        avgPre_list.append(avg_precision)"""
    #全エポックの中で最良の結果を返しているがそれはダメなので消した    
    #return np.min(hLoss_list), np.min(rLoss_list), np.min(oError_list), np.min(conv_list), np.max(avgPre_list)
    #最終的な結果と出力と予測ラベルを返す
    return hamming_loss, ranking_loss, one_error, coverage, avg_precision, outputs, p_labels

def metalearning(train_loader, test_loader, meta_loader, num_epoch=500, clean=True):

    net = LinearNet(num_inputs=features_num, num_outputs=labels_num)
    net = net.to(device)
    optimizer = optim.SGD(params=net.params(), lr=lr, momentum=momentum, weight_decay=weight_decay)

    hLoss_list = []
    rLoss_list = []
    oError_list = []
    conv_list = []
    avgPre_list = []

    for epoch in range(num_epoch):
        train_loss = 0.0
        meta_loss = 0.0

        if clean:
            train_loss += train_common(net, optimizer, train_loader)
            train_l, meta_l = train_meta(net, optimizer, train_loader, meta_loader)
            train_loss += train_l
            meta_loss += meta_l

            train_loss /= (len(train_loader) + len(meta_loader))
            meta_loss /= len(train_loader)
        else:
            train_l, meta_l = train_meta(net, optimizer, train_loader, meta_loader)
            train_loss += train_l
            meta_loss += meta_l

            train_loss /= len(train_loader)
            meta_loss /= len(train_loader)

        hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels= test(net, test_loader)

        print('Batch: [{:0>4}/{:0>4}] '.format(epoch + 1, num_epoch),
              'training loss: {:.4f} '.format(train_loss),
              'meta_loss: {:.4f} '.format(meta_loss),
              'hLoss: {:.4f} '.format(hamming_loss),
              'rLoss: {:.4f} '.format(ranking_loss),
              'oError: {:.4f} '.format(one_error),
              'conv: {:.4f} '.format(coverage),
              'avgPre: {:.4f}'.format(avg_precision))

        """hLoss_list.append(hamming_loss)
        rLoss_list.append(ranking_loss)
        oError_list.append(one_error)
        conv_list.append(coverage)
        avgPre_list.append(avg_precision)"""

    #全エポックの中で最良の結果を返しているがそれはダメなので消した    
    #return np.min(hLoss_list), np.min(rLoss_list), np.min(oError_list), np.min(conv_list), np.max(avgPre_list)
    #最終的な結果と出力と予測ラベルを返す
    return hamming_loss, ranking_loss, one_error, coverage, avg_precision, outputs, p_labels


def save_result(data, method, learning_rate, result):
    file = './result/LinearNet/' + data
    if not os.path.exists(file):
        os.mkdir(file)
    filename = './result/LinearNet/' + data + '/' + method + '_' + str(learning_rate) + '.mat'
    content = {}

    content['result'] = result
    savemat(filename, content)

def save_log(content, learning_rate):
    filename = './result/LinearNet/' + str(learning_rate) + '_log.txt'
    file = open(filename, 'a')
    file.write(content)
    file.close()

def adjust_learning_rate(optimizer, epochs, learning_rate):
    lr = learning_rate * ((0.1 ** int(epochs >= 80)) * (0.1 ** int(epochs >= 100)))  # For WRN-28-10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

dir = "/content/drive/MyDrive/Colab Notebooks"
batch_size = 100
meta_size = 5
#lr_list = [0.01, 0.001]
lr_list = [0.01]
momentum = 0.9
weight_decay = 1e-4
methods = ['meta', 'meat_clean', 'baseline', 'baseline_clean', 'ground_truth']
datasets = ['music_emotion', 'music_style','mirflickr','emotions','enron','CAL500','scene','genbase']
repeat = 1
#noise_list_m = [50,100,150]
noise_list_m = [50]
#true_list = [1,3,5,7,9]
true_list = [1]
if __name__ == '__main__':

    #for data in datasets:
    for data in ["music_emotion"]:
        print('\n')

        if data in ['music_emotion', 'music_style']:
            batch_size = 200
        elif data in ['mirflickr', 'tmc', 'mediamill']:
            batch_size = 500
        elif data in ['CAL500', 'emotions','genbase']:
            batch_size = 50

        if data in ['music_emotion', 'music_style','YeastBP']:
          noise_list = [0]
        elif data in ['emotions','enron','CAL500','scene','genbase']:
          noise_list = noise_list_m.copy()

        for lr in lr_list:
            #for method in methods:
            #print('Dataname: {}\t Methods: {}\t Learning rate: {}'.format(data, method, lr))
            #result = np.empty((repeat, 5), dtype=np.float)
            for i in range(repeat):
                for p_noise in noise_list:
                  result = [[""]*6 for l in range(31)]
                  result[0][0] = "p_true"
                  result[1][0] = "hamming_loss"
                  result[7][0] = "ranking_loss"
                  result[13][0] = "one_error"
                  result[19][0] = "coverage"
                  result[25][0] = "average_precision"
                  result[0][1] = 1
                  result[0][2] = 3
                  result[0][3] = 5
                  result[0][4] = 7
                  result[0][5] = 9
                  """df = pd.DataFrame(result)
                  #df=df.style.hide_index()
                  outputpath = "/content/drive/MyDrive/Colab Notebooks/result/PML_MD/"+str(p_noise)+".jpg"
                  TablePlot(df,len(result[0]),len(result)+1,outputpath)"""
                  for m in range(1):
                      method = methods[m]
                      result[2+m][0]=method
                      result[8+m][0]=method
                      result[14+m][0]=method
                      result[20+m][0]=method
                      result[26+m][0]=method
                      for tru in range(len(true_list)):
                      #for p_true in true_list:
                          p_true = true_list[tru]
                          cv_num = i
                          print(f'data={data}/cv={str(i)}/p_noise={str(p_noise)}/p_true={str(p_true)}/method={method}\n')
                          train_loader, test_loader, meta_loader, clean_loader, \
                          noisy_loader, features_num, labels_num = get_loader(data, batch_size, cv_num,p_noise,p_true,meta_size=meta_size)

                          if method == 'meta':
                              hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels = metalearning(train_loader,
                                                                                                      test_loader,
                                                                                                      meta_loader,
                                                                                                      clean=False)
                          elif method == 'meta_clean':
                              hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels = metalearning(train_loader,
                                                                                                      test_loader,
                                                                                                      meta_loader,
                                                                                                      clean=True)
                          elif method == 'baseline':
                              hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels = baseline(noisy_loader,
                                                                                                  test_loader, meta_loader,
                                                                                                  clean=False)
                          elif method == 'baseline_clean':
                             hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels = baseline(train_loader,
                                                                                                  test_loader, meta_loader,
                                                                                                  clean=True)

                          elif method == 'ground_truth':           # ground truth
                              hamming_loss, ranking_loss, one_error, coverage, avg_precision ,outputs, p_labels = baseline(clean_loader,
                                                                                                  test_loader, meta_loader,
                                                                                                  clean=False)

                          np.savetxt(f"{dir}/result/PML_MD/{method}/{data}/{str(p_noise)}/true/{str(p_true)}/creds_{str(i)}.csv",outputs,delimiter =',')
                          np.savetxt(f"{dir}/result/PML_MD/{method}/{data}/{str(p_noise)}/true/{str(p_true)}/predict_labels_{str(i)}.csv",p_labels,fmt='%d',delimiter=',')

                          result[2+m][1+tru]=format(hamming_loss,".4f")
                          result[8+m][1+tru]=format(ranking_loss,".4f")
                          result[14+m][1+tru]=format(one_error,".4f")
                          result[20+m][1+tru]=format(coverage,".4f")
                          result[26+m][1+tru]=format(avg_precision,".4f")


                          print()
                          print('Test results of the last :\t',
                              'Best hLoss: {:.4f} '.format(hamming_loss),
                              'Best rLoss: {:.4f} '.format(ranking_loss),
                              'Best oError: {:.4f} '.format(one_error),
                              'Best conv: {:.4f} '.format(coverage),
                              'Best avgPre: {:.4f} '.format(avg_precision))

                          """result_mean = np.mean(result, axis=0)
                          result_std = np.std(result, axis=0)

                          content = time.strftime('%Y-%m-%d %H:%M:%S   ', time.localtime()) + \
                                                  'Dataset: {:15}  '.format(data) + 'Method: {:15}  '.format(method) + \
                                                  'Learning Rate: {:.4f} '.format(lr) + \
                                                  'p_noise: {:.d}'.format(p_noise) + \
                                                  'p_true:{.d}'.format(p_true) + \
                                                  'hLoss: {:.4f}/{:.4f}  '.format(result_mean[0], result_std[0]) + \
                                                  'rLoss: {:.4f}/{:.4f}  '.format(result_mean[1], result_std[1]) + \
                                                  'oError: {:.4f}/{:.4f}  '.format(result_mean[2], result_std[2]) + \
                                                  'conv: {:.4f}/{:.4f}  '.format(result_mean[3], result_std[3]) + \
                                                  'avgPre: {:.4f}/{:.4f}\n'.format(result_mean[4], result_std[4])
                          #print()
                          #print(content)"""

                          #save_result(data, method, lr, result)
                          #save_log(content, lr)
                  
                  #save_fname = str(p_noise)+".csv"
                  #df = pd.DataFrame(result)
                  #df=df.style.hide_index()
                  #outputpath = "/content/drive/MyDrive/Colab Notebooks/result/PML_MD/"+str(p_noise)+".jpg"
                  #TablePlot(df,20,10,outputpath)
                  #import csv
                  #with open("/content/drive/MyDrive/Colab Notebooks/result/PML_MD/"+str(p_noise)+".csv", 'w', newline='') as fl:
                  #    writer = csv.writer(fl)
                  #    writer.writerows(result)



data=music_emotion/cv=0/p_noise=0/p_true=1/method=meta

Batch: [0001/0500]  training loss: 1.5177  meta_loss: 1.9891  hLoss: 0.2305  rLoss: 0.3171  oError: 0.5991  conv: 0.4941  avgPre: 0.5189
Batch: [0002/0500]  training loss: 1.2481  meta_loss: 1.3391  hLoss: 0.2119  rLoss: 0.2937  oError: 0.5516  conv: 0.4605  avgPre: 0.5579
Batch: [0003/0500]  training loss: 1.1506  meta_loss: 1.2233  hLoss: 0.2152  rLoss: 0.2790  oError: 0.5077  conv: 0.4454  avgPre: 0.5837
Batch: [0004/0500]  training loss: 1.1052  meta_loss: 1.2149  hLoss: 0.2057  rLoss: 0.2735  oError: 0.5128  conv: 0.4375  avgPre: 0.5872
Batch: [0005/0500]  training loss: 1.0901  meta_loss: 1.3240  hLoss: 0.2101  rLoss: 0.2766  oError: 0.5106  conv: 0.4452  avgPre: 0.5771
Batch: [0006/0500]  training loss: 1.0690  meta_loss: 1.3412  hLoss: 0.2110  rLoss: 0.2707  oError: 0.4952  conv: 0.4396  avgPre: 0.5982
Batch: [0007/0500]  training loss: 1.0573  meta_loss: 1.2653  hLoss: 0.2084  rLoss: 0.2714  oError: 0.5011  conv: 0.4403

FileNotFoundError: ignored

In [None]:
np.savetxt(f"{dir}/result/PML_MD/{method}/{data}/{str(p_noise)}/true/{str(p_true)}/creds_{str(i)}.csv",outputs,delimiter =',')
np.savetxt(f"{dir}/result/PML_MD/{method}/{data}/{str(p_noise)}/true/{str(p_true)}/predict_labels_{str(i)}.csv",p_labels,fmt='%d',delimiter=',')

In [None]:
dir = "/content/drive/MyDrive/Colab Notebooks"

In [None]:
methods = ['meta', 'meat_clean', 'baseline', 'baseline_clean', 'ground_truth']
datasetss = ['music_emotion', 'music_style','mirflickr','YeastBP','emotions','enron','CAL500','scene','genbase']
repeat = 1
noise_list = [50,100,150]
#noise_list = [50]
true_list = [1,3,5,7,9]
for method in methods:
  %mkdir {method}
  %cd {method}
  for data in datasetss:
    if data in ['music_emotion', 'music_style','mirflickr','YeastBP']:
      %mkdir {data}
      %cd {data}
      for n in [0]:
        %mkdir {str(n)}
        %cd {str(n)}
        %mkdir true
        %cd true
        for t in true_list:
          %mkdir {str(t)}
        %cd ..
        %cd ..
    else:
      %mkdir {data}
      %cd {data}
      for n in noise_list:
        %mkdir {str(n)}
        %cd {str(n)}
        %mkdir true
        %cd true
        for t in true_list:
          %mkdir {str(t)}
        %cd ..
        %cd ..
    %cd ..
  %cd ..

/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_emotion
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_emotion/0
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_emotion/0/true
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_emotion/0
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_emotion
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_style
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_style/0
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_style/0/true
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_style/0
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/music_style
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta
/content/drive/MyDrive/Colab Notebooks/result/PML_MD/meta/mirflickr
/content/drive/MyDrive/Col

In [None]:
from scipy.sparse import csr_matrix
from sklearn.metrics import hamming_loss, coverage_error, label_ranking_average_precision_score, label_ranking_loss
import numpy as np

In [None]:
d_list=["emotions","enron","CAL500","scene","genbase","mirflickr","music_style","music_emotion"]
for i in range(len(d_list)):
  dataname=d_list[i]
  ath = f"/content/drive/MyDrive/Colab Notebooks/new_data2/" + dataname + "/"
  labels = np.loadtxt(ath+"target.csv", delimiter=',',dtype =float)
  if dataname in ["mirflickr","music_style","music_emotion"]:
    plabels = np.loadtxt(ath+"cand/0.csv", delimiter=',',dtype =float)
  else:
    plabels = np.loadtxt(ath+"cand/50.csv", delimiter=',',dtype =float)
  a,b=np.shape(labels)
  z=np.zeros((a,b))
  o=np.ones((a,b))
  zl=coverage_error(labels,z)
  ol=coverage_error(labels,o)
  print(dataname)
  print("zl="+str(zl))
  print("ol="+str(ol))

emotions
zl=6.0
ol=6.0
enron
zl=53.0
ol=53.0
CAL500
zl=174.0
ol=174.0
scene
zl=6.0
ol=6.0
genbase
zl=27.0
ol=27.0
mirflickr
zl=7.0
ol=7.0
music_style
zl=10.0
ol=10.0
music_emotion
zl=11.0
ol=11.0


In [None]:
print(zl)
print(ol)

0.22032116866011203
0.779678831339888
