In [79]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3,0" #list the gpu cores in the order you want to use e.g "0,1,2,3"
cpu = torch.device('cpu')#"cuda:0" if torch.cuda.is_available() else "cpu")
dv=torch.device("cuda")

import os
import torch.tensor as tensor
os.chdir("/yourpath/ACS_PUMS")


## Loading Train and Validation data

In [105]:
import folktables
from folktables import ACSDataSource

data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
acs_data = data_source.get_data( download=True)

In [4]:
features=[
        'AGEP',
        'SCHL',
        'MAR',
        #'RELSHIPP',#'RELP',
        'DIS',
        'ESP',
        'CIT',
        'MIG',
        'MIL',
        'ANC',
        'NATIVITY',
        'DEAR',
        'DEYE',
        'DREM',
        'SEX',
        'RAC1P',
        'PUMA',
        'ST',
        'OCCP',
        'JWTR',#use 'JWTRNS' for testing (2019) data for training (2018) data the feature is 'JWTR',#
        'POWPUMA',
    ]

In [5]:
Employment = folktables.BasicProblem(
     features=features,
    target='ESR',
    target_transform=lambda x: x == 1,
    group='SEX',
    preprocess=folktables.acs.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [6]:
Income = folktables.BasicProblem(
     features=features,
    target='PINCP',
    target_transform=lambda x: x > 50000,
    group='SEX',
    preprocess=folktables.acs.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [7]:
HealthInsurance = folktables.BasicProblem(
     features=features,
    target='HINS2',
    target_transform=lambda x: x == 1,
    group='SEX',
    preprocess=folktables.acs.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [8]:
TravelTime = folktables.BasicProblem(
     features=features,
    target="JWMNP",
    target_transform=lambda x: x > 20,
    group='SEX',
    preprocess=folktables.acs.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [9]:
IncomePovertyRatio = folktables.BasicProblem(
    features=features,
    target='POVPIP',
    target_transform=lambda x: x < 250,
    group='SEX',
    preprocess=folktables.acs.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [10]:
f, l1, g = Employment.df_to_numpy(acs_data)
f, l2, g = Income.df_to_numpy(acs_data)

f, l3, g = HealthInsurance.df_to_numpy(acs_data)
f, l4, g = TravelTime.df_to_numpy(acs_data)
f, l5, g = IncomePovertyRatio.df_to_numpy(acs_data)

In [11]:
y=np.array([[0 if v==False else 1 for v in l1],[0 if v==False else 1 for v in l2],[0 if v==False else 1 for v in l3],\
           [0 if v==False else 1 for v in l4],[0 if v==False else 1 for v in l5]])

In [34]:
ids=np.arange(len(f))
X_train, X_val,in_tr,in_val  = train_test_split(f,ids, test_size=0.3,random_state=9)

In [37]:
y_train,y_v=[y[i][in_tr] for i in range(len(y))],[y[i][in_val] for i in range(len(y))]
g_train=g[in_tr]
g_val=g[in_val]
N_tasks=len(y)

5

In [38]:
y_train=[torch.tensor(y_train[i]) for i in range(N_tasks)]

## Build L2T-FMT model

In [None]:
class MTL(nn.Module):

    def __init__(self,d_in=50,tasks=2):
        super(MTL, self).__init__()
        self.tasks=tasks
        self.fc1 = nn.Linear(d_in, 1024)  
        self.bn1= nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 128)
        
        self.bn3 = nn.BatchNorm1d(1024)
        self.fc4 = nn.Linear(1024,1024)
        self.tasks_out=nn.ModuleDict({str(t):nn.Linear(128,2) for t in range(self.tasks)})

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        
        x = F.relu(self.bn3(self.fc4(x)))
        
        x = F.relu(self.bn2(self.fc2(x)))        
        x = F.relu(self.fc3(x))

        t=[self.tasks_out[str(i)](x) for i in range(self.tasks)]
        
        return t


In [26]:
def fair_loss(output,target,x_control):
    prot_att=x_control
    index_prot=torch.squeeze(torch.nonzero(prot_att[:] != 1.))
    target_prot=torch.index_select(target, 0, index=index_prot)
    index_prot_pos=torch.squeeze(torch.nonzero(target_prot[:] == 1. ))
    index_prot_neg=torch.squeeze(torch.nonzero(target_prot[:] == 0. ))

    index_non_prot=torch.squeeze(torch.nonzero(prot_att[:] == 1.))
    target_non_prot=torch.index_select(target, 0, index=index_non_prot)
    index_non_prot_pos=torch.squeeze(torch.nonzero(target_non_prot[:] == 1. ))
    index_non_prot_neg=torch.squeeze(torch.nonzero(target_non_prot[:] == 0. ))

    l_prot_pos=F.cross_entropy(torch.index_select(output, 0, index=index_prot_pos),torch.index_select(target, 0, index=index_prot_pos))    
    l_non_prot_pos=F.cross_entropy(torch.index_select(output, 0, index=index_non_prot_pos),torch.index_select(target, 0, index=index_non_prot_pos))    
    l_non_prot_neg=F.cross_entropy(torch.index_select(output, 0, index=index_non_prot_neg),torch.index_select(target, 0, index=index_non_prot_neg))
    l_prot_neg=F.cross_entropy(torch.index_select(output, 0, index=index_prot_neg),torch.index_select(target, 0, index=index_prot_neg))    

    for l in [l_prot_pos,l_non_prot_pos,l_prot_neg,l_non_prot_neg]:
        if torch.isinf(l)==True:
            l=torch.zeros_like(l,requires_grad=True)
    dl_pos=torch.max(l_prot_pos,l_non_prot_pos)
    dl_neg=torch.max(l_prot_neg,l_non_prot_neg)
    L=dl_pos+dl_neg
    
    return L

In [31]:
import torchmetrics
acc = torchmetrics.Accuracy()
def DM_rate(output,target,x_control):
    prot_att=x_control
    index_prot=torch.squeeze(torch.nonzero(prot_att[:] != 1.))
    target_prot=torch.index_select(target, 0, index=index_prot)
    index_prot_pos=torch.squeeze(torch.nonzero(target_prot[:] == 1. ))
    index_prot_neg=torch.squeeze(torch.nonzero(target_prot[:] == 0. ))

    index_non_prot=torch.squeeze(torch.nonzero(prot_att[:] == 1.))
    target_non_prot=torch.index_select(target, 0, index=index_non_prot)
    index_non_prot_pos=torch.squeeze(torch.nonzero(target_non_prot[:] == 1. ))
    index_non_prot_neg=torch.squeeze(torch.nonzero(target_non_prot[:] == 0. ))

    if index_prot_pos.shape==torch.Size([]) or index_prot_pos.shape==torch.Size([0])\
        or index_non_prot_pos.shape==torch.Size([]) or index_non_prot_pos.shape==torch.Size([0]):
            l_prot_pos=torch.tensor(0.0001)
            l_non_prot_pos=torch.tensor(0.0001)
    else:        
            l_prot_pos=acc(torch.index_select(output, 0, index=index_prot_pos),torch.index_select(target, 0, index=index_prot_pos))    
            l_non_prot_pos=acc(torch.index_select(output, 0, index=index_non_prot_pos),torch.index_select(target, 0, index=index_non_prot_pos))    
    
    if index_prot_neg.shape==torch.Size([]) or index_prot_neg.shape==torch.Size([0])\
        or index_non_prot_neg.shape==torch.Size([]) or index_non_prot_neg.shape==torch.Size([0]):
            l_prot_neg=torch.tensor(0.0001)
            l_non_prot_neg=torch.tensor(0.0001)
    else:        
            l_prot_neg=acc(torch.index_select(output, 0, index=index_prot_neg),torch.index_select(target, 0, index=index_prot_neg))    
            l_non_prot_neg=acc(torch.index_select(output, 0, index=index_non_prot_neg),torch.index_select(target, 0, index=index_non_prot_neg))  
            
    dl_pos=torch.abs(l_prot_pos-l_non_prot_pos)
    dl_neg=torch.abs(l_prot_neg-l_non_prot_neg)
    DM=dl_pos+dl_neg
    
    return DM

In [41]:
class DeepQNetwork(nn.Module):
    def __init__(self, input_dims, fc1_dims, fc2_dims,fc3_dims=256,
                 n_actions=2,n_tasks=2):
        super(DeepQNetwork, self).__init__()
        self.tasks=n_tasks
        self.input_dims = input_dims
        self.fc1_dims = fc1_dims
        self.bn1= nn.LayerNorm(fc1_dims)
        self.fc2_dims = fc2_dims
        self.bn2= nn.LayerNorm(fc2_dims)
        self.fc3_dims = fc3_dims
        self.n_actions = n_actions
        self.fc1 = nn.Linear(self.input_dims, self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims, self.fc2_dims)
        self.fc3 = nn.Linear(self.fc2_dims, self.fc3_dims)
        self.fc_out=nn.ModuleDict({str(t):nn.Linear(self.fc3_dims, self.n_actions) for t in range(self.tasks)})

    def forward(self, state):
        x = F.relu(self.bn1(self.fc1(state)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = F.relu(self.fc3(x))

        Q_sa=[self.fc_out[str(i)](x) for i in range(self.tasks)]

        return Q_sa#Q1_sa,Q2_sa#,Q3_sa

In [42]:

gamma=0.9
omega_T=torch.tensor([1/N_tasks for t in range(N_tasks)]).to(dv)
init_loss_T=[None for t in range(N_tasks)]
criteria_t=nn.MSELoss()
def Teacher(state,state_a,state_b,R_a,R_b,DqN,opti_T):
    grads,G_n,loss_ratio=[],[],[]
    global init_loss_T
    global omega_T
    opti_T.zero_grad()
    for t in range(N_tasks):        
        Q_S=DqN(state[t])[t]
        target=[]
        with torch.no_grad():
            Q_sa=R_a[t]+gamma*torch.max(DqN(state_a[t])[t],1)[0]
            Q_sb=R_b[t]+gamma*torch.max(DqN(state_b[t])[t],1)[0]
        target=torch.tensor([[Q_sa[i],Q_sb[i]] for i in range(len(Q_sa))]).to(dv)
        teach_loss=criteria_t(Q_S,target)
        teach_loss.backward()
        if init_loss_T[t]== None:
            init_loss_T[t]=teach_loss.item()
        loss_ratio.append(teach_loss.item()/init_loss_T[t])
        grads_sh={}
        for n,p in DqN.named_parameters():
            if p.data.shape[0]!=2 and p.grad!=None:
                grads_sh[n] = p.grad 
                
        grads.append(grads_sh)
        G_n.append(torch.linalg.norm(torch.stack([torch.linalg.norm(grads_sh[g]) for g in grads_sh])))
    G_n = torch.stack(G_n)
    E_t = sum(loss_ratio)/len(loss_ratio)
    r_t=[loss/E_t for loss in loss_ratio] #relative inverse trainin g rate of teacher
    omega_T,DqN = Update_model(DqN,grads,omega_T,G_n,r_t,opti_T)
    torch.save(omega_T,'/yourpath/ACS-PUMS/omega_T.pt') # 
                   
    return DqN

In [43]:
lr=0.001
def Update_model(model,grads_sh,omega,G_n,r_t,opti):
    loss_gn=[(G_n[t]-torch.mean(G_n)*r_t[t]) for t in range(len(G_n))]
    for i in range(len(G_n)):
        d_l=0
        if loss_gn[i]>0:
            d_l+=(len(G_n)-1)/len(G_n)*G_n[i]
        elif loss_gn[i]<0:
            d_l-=(len(G_n)-1)/len(G_n)*G_n[i]
        for j in range(len(G_n)):
            if j!=i:
                if loss_gn[j]>0:
                    d_l-=(G_n[i]/len(G_n))
                elif loss_gn[j]<0:
                    d_l+=(G_n[i]/len(G_n))
        
        omega[i]-=lr*d_l
        omega[i]=1
    for t in range(len(G_n)):
        for n,p in model.named_parameters():
            if p.data.shape[0]!=2 and p.grad!=None:
                if t==0:
                    p.grad=omega[t]*grads_sh[t][n]
                else:
                    p.grad+=omega[t]*grads_sh[t][n]
                    
    opti.step() 
    for i in range(len(omega)):
        if omega[i]<0:
            omega[i]=-omega[i]
    return omega,model

In [44]:
net=MTL(model_pre.to(dv),tasks=N_tasks)
net=nn.DataParallel(net)
net.to(dv)
opti_S=optim.AdamW(params=net.parameters())

flat_w=torch.flatten(net.module.tasks_out['0'].weight)
dqn=DeepQNetwork(len(flat_w),512,512,n_actions=2,n_tasks=N_tasks).to(dv)
opti_T = optim.AdamW(dqn.parameters())

In [46]:
def Student(stud,path,action,X,y,t=0,xc=None):
    stud.load_state_dict(torch.load(path))
    opti = optim.Adam(stud.parameters())
    opti.zero_grad()
    out=stud(X)[t]
    if xc!=None:
        loss=action(out,y,xc)        
    else:
        loss=action(out,y)
    #print(loss)
    loss.backward()
    opti.step()
    return stud

## Train Model

In [None]:
omega_S=torch.tensor([1/N_tasks for t in range(N_tasks)]).to(dv)
init_loss_S=[None for t in range(N_tasks)]
m_acc,flag=0.0,1
criteria = nn.CrossEntropyLoss()
best_S=[[0,0] for t in range(N_tasks)]
optim_path,optim_path_disc,optim_clas=[],[],[]
path='/yourpath/ACS-PUMS/Model_l2tfmt.pt' #path to save your model
path_t='/yourpath/ACS-PUMS/dqn_n.pt' ##path to save your teacher model
torch.save(net.state_dict(),path)
action_choices=[]
stud=nn.DataParallel(MTL(model_pre,tasks=N_tasks)).to(dv)
dt=[dataloader1,dataloader2,dataloader3,dataloader4]
yt=[y_tr1,y_tr2,y_tr3,y_tr4]
gt=[g_tr1,g_tr2,g_tr3,g_tr4]
for epoch in range(20):  # loop over each NN multiple times
    loss_pointer=[]
    i,batch=0,8192
    j=0
    state_net,state_a,state_f=[[] for t in range(N_tasks)],[[] for t in range(N_tasks)],[[] for t in range(N_tasks)]
    R_A,R_F=[[] for t in range(N_tasks)],[[] for t in range(N_tasks)]
    while(i<len(X_train)):
        # get the inputs; data is a list of [inputs, labels]
        if (i+batch)<len(X_train):
            inputs, in_t = torch.tensor(X_train[i:i+batch]),in_tr[i:i+batch]
            labels=[y_train[t][i:i+batch] for t in range(N_tasks)]#,y3_train[i:i+batch]]
            #if epoch<pretrn:
            xc=xg[i:i+batch]
            i=i+batch 
        else:
            inputs,in_t = torch.tensor(X_train[i:]),in_tr[i:]
            labels=[y_train[t][i:] for t in range(N_tasks)]#,y2_train[i:]]#,y3_train[i:]]
            #if epoch<pretrn:
            xc=xg[i:]
            i=len(X_train)
        
        # zero the parameter gradients
        net.load_state_dict(torch.load(path))
        net.to(dv)
        fg=[0 for i in range(N_tasks)]
        with torch.no_grad():
            for t in range(N_tasks):
                flat=torch.flatten(net.module.tasks_out[str(t)].weight)
                fg[t]=torch.argmax(dqn(flat)[t])
                       
        opti_S.zero_grad()
        outputs = net(inputs.to(dv).float())
        lp,grads,G_n,loss_ratio=[0 for t in range(N_tasks)],[],[],[]
        for t in range(N_tasks):
            
            #MTL starts
            loss_a=criteria(outputs[t], labels[t].to(dv))
            loss_f=fair_loss(outputs[t], labels[t].to(dv),xc.to(dv))
            if fg[t]==0:
                loss_t=loss_a
                lp[t]=0
            else:
                loss_t=loss_f
                lp[t]=1
            loss_t.backward(retain_graph=True)
            
            if init_loss_S[t]== None:
                init_loss_S[t]=loss_t.item()
            loss_ratio.append(loss_t.item()/init_loss_S[t])
            grads_sh={}
            for n,p in net.named_parameters():
                if p.data.shape[0]!=2 and p.grad!=None:
                    grads_sh[n] = p.grad
                    p.grad=None
            grads.append(grads_sh)
            G_n.append(torch.linalg.norm(torch.stack([torch.linalg.norm(grads_sh[g]) for g in grads_sh])))
        loss_pointer.append(lp)
        G_n = torch.stack(G_n)
        E_t = sum(loss_ratio)/len(loss_ratio)
        r_t=[loss/E_t for loss in loss_ratio] #relative inverse trainin g rate of student
        omega_S,net = Update_model(net,grads,omega_S,G_n,r_t,opti_S) 
        torch.save(net.state_dict(),path)
        torch.save(omega_S,'/yourpath/ACS-PUMS/omega_S.pt')
        for t in range(N_tasks):
            state_net[t].append(torch.unsqueeze(torch.flatten(net.module.tasks_out[str(t)].weight),dim=0))

            accuracy=acc(outputs[t].to(cpu),labels[t].to(cpu))
            fairness=DM_rate(outputs[t].to(cpu), labels[t].to(cpu),xc.to(cpu))

            learn_a=Student(stud,path,criteria,inputs.to(dv).float(),labels[t].to(dv),t=t)

            learn_f=Student(stud,path,fair_loss,inputs.to(dv).float(),labels[t].to(dv),t=t,xc=xc.to(dv))


            state_a[t].append(torch.unsqueeze(torch.flatten(learn_a.module.tasks_out[str(t)].weight),dim=0))            
            state_f[t].append(torch.unsqueeze(torch.flatten(learn_f.module.tasks_out[str(t)].weight),dim=0))
        
            with torch.no_grad():
                out_A=learn_a(inputs.to(dv).float())[t]
                out_F=learn_f(inputs.to(dv).float())[t]
                acc_a=acc(out_A.to(cpu),labels[t].to(cpu))
                DM_a=DM_rate(out_A.to(cpu),labels[t].to(cpu),xc.to(cpu))
                acc_f=acc(out_F.to(cpu),labels[t].to(cpu))
                DM_f=DM_rate(out_F.to(cpu),labels[t].to(cpu),xc.to(cpu))
        
            if best_S[t][0]==0:
                best_S[t][0]=accuracy.item()
                best_S[t][1]=fairness
            # the reward functions
            R_A[t].append(torch.min((acc_a-best_S[t][0])/best_S[t][0],(1-DM_a-best_S[t][1])/best_S[t][1]))
            R_F[t].append(torch.min((acc_f-best_S[t][0])/best_S[t][0],(1-DM_f-best_S[t][1])/best_S[t][1]))
        del inputs; del labels; del xc 
    for t in range(N_tasks):
        state_net[t]=torch.cat(state_net[t],0).to(dv)
        state_a[t]=torch.cat(state_a[t],0).to(dv)
        state_f[t]=torch.cat(state_f[t],0).to(dv)
        R_A[t]=torch.tensor(R_A[t]).to(dv)
        R_F[t]=torch.tensor(R_F[t]).to(dv)
    dqn=Teacher(state_net,state_a,state_f,R_A,R_F,dqn,opti_T)
    torch.save(dqn.state_dict(),path_t)
    del state_net; del state_a; del state_f; del R_A; del R_F
    torch.cuda.empty_cache()
    with torch.no_grad():
        pred0=net(torch.tensor(X_val).to(dv).float())
        print('Epoch: ',epoch, 'Loss pointers',loss_pointer)
        action_choices.append(loss_pointer)
        np.save(file="/yourpath/ACS-PUMS/loss_pointers_age_n.npy",arr=np.array(action_choices)) 
        for t in range(N_tasks): 
            accuracy=acc(pred0[t].to(cpu),torch.tensor(y_v[t]))
            EO=DM_rate(pred0[t].to(cpu),torch.tensor(y_v[t]),torch.tensor(g_val))
            
            if accuracy>best_S[t][0]:
                best_S[t][0]=accuracy
            if 1-EO>best_S[t][1]:
                best_S[t][1]=1-EO
        

## Loading Test Data

In [5]:
features=[
        'AGEP','SCHL','MAR','DIS','ESP','CIT','MIG', 'MIL',
        'ANC','NATIVITY', 'DEAR', 'DEYE',
        'DREM', 'SEX', 'RAC1P','PUMA','ST','OCCP',
        'JWTRNS',#use 'JWTRNS' for testing (2019) data for training (2018) data the feature is 'JWTR',#
        'POWPUMA',
    ]

In [15]:
import folktables
from folktables import ACSDataSource

data_source = ACSDataSource(survey_year='2019', horizon='1-Year', survey='person')
acs_data = data_source.get_data( download=True)
f, l1, g = Employment.df_to_numpy(acs_data)
f, l2, g = Income.df_to_numpy(acs_data)
''
f, l3, g = HealthInsurance.df_to_numpy(acs_data)
f, l4, g = TravelTime.df_to_numpy(acs_data)
f, l5, g = IncomePovertyRatio.df_to_numpy(acs_data)
y_test=np.array([[0 if v==False else 1 for v in l1],[0 if v==False else 1 for v in l2],[0 if v==False else 1 for v in l3],\
           [0 if v==False else 1 for v in l4],[0 if v==False else 1 for v in l5]])
N_tasks=len(y_test)

In [None]:
class STL(nn.Module):

    def __init__(self,d_in=50):
        super(STL, self).__init__()
        self.fc1 = nn.Linear(d_in, 1024)  
        self.bn1= nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 128)
        
        self.bn3 = nn.BatchNorm1d(1024)
        self.fc4 = nn.Linear(1024,1024)        
        self.task = nn.Linear(128,2)


    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        
        x = F.relu(self.bn3(self.fc4(x)))
        
        x = F.relu(self.bn2(self.fc2(x)))        
        x = F.relu(self.fc3(x))
        t = self.task(x)
       
        return t

In [None]:
spaths={'path'+str(t):'/yourpath/ACS_PUMS/model/Model_stl'+str(t)+'.pt' for t in range(N_tasks)}

In [None]:
net=nn.DataParallel(STL(d_in=X_train.shape[1]))
net.to(dv)
Bests=[]
for t in range(N_tasks):  
    net.load_state_dict(torch.load(spaths['path'+str(t)]))
    pred0=net(torch.tensor(X_test).float())
    accuracy=acc(pred0.to(cpu),torch.tensor(y_test[t]).to(cpu))
    DM=DM_rate(pred0.to(cpu),torch.tensor(y_test[t]).to(cpu),torch.tensor(g_test).to(cpu))
    Bests.append([accuracy,DM])

In [38]:
net=MTL(d_in=f.shape[1],tasks=N_tasks)
net=nn.DataParallel(net)
net.to(dv)
l2t=[]
net.load_state_dict(torch.load(path))
pred0=net(torch.tensor(f).float())
ra,rf=0,0
for t in range(N_tasks):            
    accuracy=acc(pred0[t],torch.tensor(y_test[t]))
    DM=DM_rate(pred0[t],torch.tensor(y_test[t]),torch.tensor(g))
    l2t.append([accuracy,DM])
    ra+=accuracy/Bests[t][0]
    rf+=DM/Bests[t][1]
    print('Task',t,'  Acc:',accuracy, 'EOs:',DM)
    print('Relative:',ra/(t+1), rf/(t+1))

Task 0   Acc: tensor(0.9688) EOs: tensor(0.0096)
Relative: tensor(0.9878) tensor(1.9025)
Task 1   Acc: tensor(0.8262) EOs: tensor(0.0118)
Relative: tensor(1.0279) tensor(1.8006)
Task 2   Acc: tensor(0.8714) EOs: tensor(0.0009)
Relative: tensor(1.0186) tensor(1.5337)
Task 3   Acc: tensor(0.7437) EOs: tensor(0.0038)
Relative: tensor(1.0327) tensor(1.4626)
Task 4   Acc: tensor(0.7711) EOs: tensor(0.0002)
Relative: tensor(1.0241) tensor(1.1760)
