In [134]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from itertools import chain

def normialise(tem_dist):
    return [tem_dist[i]/sum(tem_dist) for i in range(len(tem_dist))]

def tmp_generator(gamma_dict,num,q_dict,q_num,L):
    bin=gamma_dict[0].shape[0]
    if q_num<=0:
        q=np.matrix(np.ones((bin,bin)))
    else:
        q=q_dict[q_num]
    tmp_gamma=np.zeros((bin,bin))
    tmp_q=np.zeros((bin,bin))
    for i in range(bin):
        for j in range(bin):
            tmp_gamma[i,j]=q.item(i,j)*gamma_dict[num-1].item(i,j)*gamma_dict[num-L-1].item(i,j)/gamma_dict[num-L].item(i,j)
            tmp_q[i,j]=q.item(i,j)*gamma_dict[num-L-1].item(i,j)/gamma_dict[num-L].item(i,j)
    return np.matrix(tmp_gamma),np.matrix(tmp_q)     

def assess(bin,f,g,C,V,output):
    output=output.A1.reshape((bin,bin))
    print('sum of violation of f:',sum(abs(np.sum(output,1)-f)))
    print('sum of violation of g:',sum(abs(np.sum(output,0)-g)))
    print('total cost:',sum(sum(output*C)))
    print('entropy:',sum(sum(-output*np.log(output+0.1**3))))
    print('tr violation:',sum(abs(output.T@V)))
    print('============================================')

def plots(x_range,g,f,output):
    fig = plt.figure(figsize=(3,3))
    gs = fig.add_gridspec(2, 2, width_ratios=(4,1), height_ratios=(1,4),left=0.1,right=0.9,bottom=0.1, top=0.9,wspace=0,hspace=0)
    # Create the Axes.
    ax = fig.add_subplot(gs[1, 0])
    ax.pcolormesh(x_range, x_range, output, cmap='Blues')
    ax.set_xlabel(r'supp($X$)',fontsize=10)
    ax.set_ylabel(r'supp($\tilde{X}$)',fontsize=10)#
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax_histx = fig.add_subplot(gs[0, 0], sharex=ax) 
    ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)
    #ax_histx.set_title(r'$Pr[x]$',rotation='horizontal')
    #ax_histy.set_title(r'$Pr[\tilde{x}]$')
    ax_histx.tick_params(axis="x", labelbottom=False)
    ax_histx.tick_params(axis="y", labelleft=False)
    ax_histy.tick_params(axis="x", labelbottom=False)
    ax_histy.tick_params(axis="y", labelleft=False)
    ax_histx.plot(x_range,g,color='tab:blue')
    ax_histy.plot(f,x_range,color='tab:green') 
    return fig

def v_value(x0,x1,x):
    if x != 0:
        return (x0-x1)/x
    else:
        return 0
    
def newton(fun,dfun,a, stepmax, tol):
    if abs(fun(a))<=tol: return a
    for step in range(1, stepmax+1):
        b=a-fun(a)/dfun(a)
        if abs(fun(b))<=tol:
            return b
        else:
            a = b
    return b 

# simplist
def baseline(C,e,px,ptx,V,K):
    bin=len(px)
    bbm1=np.matrix(np.ones(bin)).T
    #I=np.where(~(V==0))[0].tolist()
    xi=np.exp(-C/e)
    gamma_classic=dict()
    gamma_classic[0]=np.matrix(xi+1.0e-9)
    for repeat in range(K):
        gamma_classic[1+2*repeat]=np.matrix(np.diag((px/(gamma_classic[2*repeat] @ bbm1)).A1))@gamma_classic[2*repeat] #np.diag(dist['x']/sum(gamma_classic.T))@gamma_classic
        gamma_classic[2+2*repeat]=gamma_classic[1+2*repeat]@np.matrix(np.diag((ptx/(gamma_classic[1+2*repeat].T @ bbm1)).A1))

    assess(bin,dist['x'],dist['t_x'],C,V,gamma_classic[2*K])
    return gamma_classic

# our method | total repair
def total_repair(C,e,px,ptx,V,K):
    bin=len(px)
    bbm1=np.matrix(np.ones(bin)).T
    I=np.where(~(V==0))[0].tolist()
    xi=np.exp(-C/e)
    gamma_dict=dict()
    gamma_dict[0]=np.matrix(xi+1.0e-9)
    gamma_dict[1]=np.matrix(np.diag((px/(gamma_dict[0] @ bbm1)).A1))@gamma_dict[0]
    gamma_dict[2]=gamma_dict[1]@np.matrix(np.diag((ptx/(gamma_dict[1].T @ bbm1)).A1))
    # step 3
    J=np.where(~((gamma_dict[2].T @ V).A1 ==0))[0].tolist()
    nu=np.zeros(bin)
    gamma_dict[3]=np.copy(gamma_dict[2])
    for j in J:
        fun = lambda z: sum(gamma_dict[2].item(i,j)*V.item(i)*np.exp(z*V.item(i)) for i in I)
        dfun = lambda z: sum(gamma_dict[2].item(i,j)*(V.item(i))**2*np.exp(z*V.item(i)) for i in I)
        nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-3) #bisection(fun, -50,50, stepmax = 25, tol = 1.0e-3)
        for i in I:
            gamma_dict[3][i,j]=np.exp(nu*V.item(i))*gamma_dict[2].item(i,j)
    gamma_dict[3]=np.matrix(gamma_dict[3])

    #=========================
    L=3
    q_dict=dict()
    for loop in range(1,K):
        tmp,q_dict[(loop-1)*L+1]=tmp_generator(gamma_dict,loop*L+1,q_dict,(loop-2)*L+1,L) #np.matrix(gamma_dict[3].A1*gamma_dict[0].A1/gamma_dict[1].A1)
        gamma_dict[loop*L+1]=np.matrix(np.diag((px/(tmp @ bbm1)).A1))@tmp

        tmp,q_dict[(loop-1)*L+2]=tmp_generator(gamma_dict,loop*L+2,q_dict,(loop-2)*L+2,L)  #np.matrix(gamma_dict[4].A1*gamma_dict[1].A1/gamma_dict[2].A1)
        gamma_dict[loop*L+2]=tmp@np.matrix(np.diag((ptx/(tmp.T @ bbm1)).A1))

        # step 3
        tmp,q_dict[(loop-1)*L+3]=tmp_generator(gamma_dict,loop*L+3,q_dict,(loop-2)*L+3,L)  #np.matrix(gamma_dict[5].A1*gamma_dict[2].A1/gamma_dict[3].A1)
        J=np.where(~((abs(np.matrix(tmp).T @ V).A1)<=1.0e-5))[0].tolist()
        gamma_dict[loop*L+3]=np.copy(tmp)
        for j in J:
            fun = lambda z: sum(tmp.item(i,j)*V.item(i)*np.exp(z*V.item(i)) for i in I)
            dfun = lambda z: sum(tmp.item(i,j)*(V.item(i))**2*np.exp(z*V.item(i)) for i in I)
            nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-5) 
            for i in I:
                gamma_dict[loop*L+3][i,j]=np.exp(nu*V.item(i))*tmp.item(i,j)
        gamma_dict[loop*L+3]=np.matrix(gamma_dict[loop*L+3])

    assess(bin,dist['x'],dist['t_x'],C,V,gamma_dict[K*L])
    return gamma_dict

# our method | partial repair
def partial_repair(C,e,px,ptx,V,theta_scale,K):
    bin=len(px)
    bbm1=np.matrix(np.ones(bin)).T
    I=np.where(~(V==0))[0].tolist()
    xi=np.exp(-C/e)
    theta=bbm1*theta_scale
    gamma_dict=dict()
    gamma_dict[0]=np.matrix(xi+1.0e-9)
    gamma_dict[1]=np.matrix(np.diag((px/(gamma_dict[0] @ bbm1)).A1))@gamma_dict[0]
    gamma_dict[2]=gamma_dict[1]@np.matrix(np.diag((ptx/(gamma_dict[1].T @ bbm1)).A1))
    # step 3
    Jplus=np.where(~((gamma_dict[2].T @ V).A1 <=theta.A1))[0].tolist()
    Jminus=np.where(~((gamma_dict[2].T @ V).A1>=-theta.A1))[0].tolist()
    gamma_dict[3]=np.copy(gamma_dict[2])
    for j in Jplus:
        fun = lambda z: sum(gamma_dict[2].item(i,j)*V.item(i)*np.exp(-z*V.item(i)) for i in I)-theta.item(j)
        dfun = lambda z: -sum(gamma_dict[2].item(i,j)*(V.item(i))**2*np.exp(-z*V.item(i)) for i in I)
        nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-3) #bisection(fun, -50,50, stepmax = 25, tol = 1.0e-3)
        for i in I:
            gamma_dict[3][i,j]=np.exp(-nu*V.item(i))*gamma_dict[2].item(i,j)
    for j in Jminus:
        fun = lambda z: sum(gamma_dict[2].item(i,j)*V.item(i)*np.exp(-z*V.item(i)) for i in I)+theta.item(j)
        dfun = lambda z: -sum(gamma_dict[2].item(i,j)*(V.item(i))**2*np.exp(-z*V.item(i)) for i in I)
        nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-3) #bisection(fun, -50,50, stepmax = 25, tol = 1.0e-3)
        for i in I:
            gamma_dict[3][i,j]=np.exp(-nu*V.item(i))*gamma_dict[2].item(i,j)
    gamma_dict[3]=np.matrix(gamma_dict[3])

    #=========================
    L=3
    q_dict=dict()
    for loop in range(1,K):
        tmp,q_dict[(loop-1)*L+1]=tmp_generator(gamma_dict,loop*L+1,q_dict,(loop-2)*L+1,L) #np.matrix(gamma_dict[3].A1*gamma_dict[0].A1/gamma_dict[1].A1)
        gamma_dict[loop*L+1]=np.matrix(np.diag((px/(tmp @ bbm1)).A1))@tmp

        tmp,q_dict[(loop-1)*L+2]=tmp_generator(gamma_dict,loop*L+2,q_dict,(loop-2)*L+2,L)  #np.matrix(gamma_dict[4].A1*gamma_dict[1].A1/gamma_dict[2].A1)
        gamma_dict[loop*L+2]=tmp@np.matrix(np.diag((ptx/(tmp.T @ bbm1)).A1))

        # step 3
        tmp,q_dict[(loop-1)*L+3]=tmp_generator(gamma_dict,loop*L+3,q_dict,(loop-2)*L+3,L)  #np.matrix(gamma_dict[5].A1*gamma_dict[2].A1/gamma_dict[3].A1)
        Jplus=np.where(~((np.matrix(tmp).T @ V).A1 <=theta.A1))[0].tolist()
        Jminus=np.where(~((np.matrix(tmp).T @ V).A1>=-theta.A1))[0].tolist()
        gamma_dict[loop*L+3]=np.copy(tmp)
        for j in Jplus:
            fun = lambda z: sum(tmp.item(i,j)*V.item(i)*np.exp(-z*V.item(i)) for i in I)-theta.item(j)
            dfun = lambda z: -sum(tmp.item(i,j)*(V.item(i))**2*np.exp(-z*V.item(i)) for i in I)
            nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-5) 
            for i in I:
                gamma_dict[loop*L+3][i,j]=np.exp(-nu*V.item(i))*tmp.item(i,j)
        for j in Jminus:
            fun = lambda z: sum(tmp.item(i,j)*V.item(i)*np.exp(-z*V.item(i)) for i in I)+theta.item(j)
            dfun = lambda z: -sum(tmp.item(i,j)*(V.item(i))**2*np.exp(-z*V.item(i)) for i in I)
            nu = newton(fun,dfun,0.5,stepmax = 25,tol = 1.0e-5) 
            for i in I:
                gamma_dict[loop*L+3][i,j]=np.exp(-nu*V.item(i))*tmp.item(i,j)
        gamma_dict[loop*L+3]=np.matrix(gamma_dict[loop*L+3])

    assess(bin,dist['x'],dist['t_x'],C,V,gamma_dict[L*K])
    return gamma_dict

def empirical_distribution(sub,x_range):
    bin=len(x_range)
    distrition=np.zeros(bin)
    for i in range(bin):
        subset=sub[sub['X']==x_range[i]] #bin_value=x_range[i] #sub[(sub['X']>=bin_value)&(sub['X']<bin_value+width)]
        if subset.shape[0]>0:
            distrition[i]=sum(subset['W'])
    if sum(distrition)>0:
        return distrition/sum(distrition)
    else:
        return distrition

def plot_rdist(rdist,x_range):
    plt.plot(x_range,rdist['x'],label=r'$Pr[x]$',color='tab:blue')
    plt.plot(x_range,rdist['x_0'],label=r'$Pr[x|s_0]$',alpha=0.3,color='tab:orange')
    plt.plot(x_range,rdist['x_1'],label=r'$Pr[x|s_1]$',alpha=0.3,color='#9f86c0')
    plt.ylabel('PMF',fontsize=14)
    plt.xlabel(r'$supp(X)=supp(\tilde{X})$',fontsize=20)
    plt.legend()
    return plt

def DisparateImpact(X_test,y_pred):
    dim=X_test.shape[1]-2
    df_test=pd.DataFrame(np.concatenate((X_test,y_pred.reshape(-1,1)), axis=1),columns=[*range(dim)]+['S','W','f'])
    numerator=sum(df_test[(df_test['S']==0)&(df_test['f']==1)]['W'])/sum(df_test[df_test['S']==0]['W'])
    denominator=sum(df_test[(df_test['S']==1)&(df_test['f']==1)]['W'])/sum(df_test[df_test['S']==1]['W'])
    return numerator/denominator
    
def rdata_analysis(rdata,x_range):
    rdist=dict()
    pivot=pd.pivot_table(rdata,index=list(chain(*[x_list])),values=['W'],aggfunc=[np.sum])[('sum','W')]
    pivot0=pd.pivot_table(rdata[rdata['S']==0],index=list(chain(*[x_list])),values=['W'],aggfunc=[np.sum])[('sum','W')]
    pivot1=pd.pivot_table(rdata[rdata['S']==1],index=list(chain(*[x_list])),values=['W'],aggfunc=[np.sum])[('sum','W')]
    rdist['x']= np.array([pivot[i] for i in x_range])/sum([pivot[i] for i in x_range]) #empirical_distribution(rdata,x_range)
    rdist['x_0']=np.array([pivot0[i] if i in list(pivot0.index) else 0 for i in x_range])/sum([pivot0[i] if i in list(pivot0.index) else 0 for i in x_range]) #empirical_distribution(rdata[rdata['S']==0],x_range)
    rdist['x_1']=np.array([pivot1[i] if i in list(pivot1.index) else 0 for i in x_range])/sum([pivot1[i] if i in list(pivot1.index) else 0 for i in x_range]) #empirical_distribution(rdata[rdata['S']==1],x_range)
    return rdist

def c_generate_higher(x_range):
    bin=len(x_range)
    dim=len(x_range[0])
    C=np.random.random((bin,bin))
    for i in range(bin):
        for j in range(bin):
            C[i,j]=sum(abs(x_range[i][d]-x_range[j][d]) for d in range(dim))
    return C

def c_generate(x_range):
    bin=len(x_range)
    C=np.random.random((bin,bin))
    for i in range(bin):
        for j in range(bin):
            C[i,j]=abs(x_range[i]-x_range[j]) 
    return C

def projection(df,coupling_matrix,x_range,x_list):
    bin=len(x_range)
    dim=len(x_list)
    data=df.groupby(by=list(chain(*[x_list,'S','Y'])),as_index=False).sum()# data column: x_list,S,Y,W
    data=data[x_list+['S','W','Y']]
    coupling=coupling_matrix.A1.reshape((bin,bin))
    df_t=pd.DataFrame(columns=['X','S','W','Y'])
    for i in range(data.shape[0]):
        orig=data.iloc[i]
        loc=np.where([x_range[i]==orig[0:dim].values for i in range(bin)])[0][0]
        rows=np.nonzero(coupling[loc,:])[0]
        sub_dict={'X':[x_range[r] for r in rows],'W':list(coupling[loc,rows]/(sum(coupling[loc,rows]))*orig[dim+1])}
        sub=pd.DataFrame(data=sub_dict, index=rows)
        sub['W']=coupling[loc,rows]/(sum(coupling[loc,rows]))*orig[dim+1]
        sub['S']=orig[dim]
        sub['Y']=orig[dim+2]
        df_t=pd.concat([df_t,sub],ignore_index=True)#pd.concat([df_t,samples_groupby(sub,x_list)], ignore_index=True)
    if dim>1:
        for d in range(dim):
            df_t[x_list[d]]=[df_t['X'][r][d] for r in range(df_t.shape[0])] #df_t['X'][:][d]
    else:
        df_t[x_list[0]]=df_t['X']

    return df_t[x_list+['S','W','Y']]

In [110]:
pd.read_csv('C:/Users/zhouq/Documents/optimal_transport/adult_csv.csv').head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country,class
0,2,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,1,0,2,United-States,<=50K
1,3,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,0,United-States,<=50K
2,2,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,2,United-States,<=50K
3,3,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,2,United-States,<=50K
4,1,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,2,Cuba,<=50K


In [116]:
coupling_matrix=baseline(C,e,px,ptx,V,K)[K*2]

sum of violation of f: 0.005185398987376815
sum of violation of g: 2.764817183791707e-16
total cost: 0.3991361376558401
entropy: 3.7109604430563325
tr violation: [[0.27053001]]


In [149]:
x_list=['education-num','sex'] #
dim=len(x_list)
messydata=pd.read_csv('C:/Users/zhouq/Documents/optimal_transport/adult_csv.csv',usecols=x_list+['race','class'])
messydata=messydata[(messydata['race']=='White')|(messydata['race']=='Black')]
for col in ['race','class']+x_list:
    messydata[col]=messydata[col].astype('category')
cat_columns = messydata.select_dtypes(['category']).columns
messydata[cat_columns]=messydata[cat_columns].apply(lambda x: x.cat.codes)
messydata=messydata.rename(columns={'race':'S','class':'Y'}) #'education-num':'X1','hoursperweek':'X2',
messydata['W']=1
#x_range_full=dict()
#for name in x_list:
#    x_range_full[name]=np.arange(min(df[name]),max(df[name])+1,1)
X=messydata[list(chain(*[x_list,'S','W']))].to_numpy()
y=messydata['Y'].to_numpy()
e=0.01
K=200
report=pd.DataFrame(columns=['DI','f1 macro','f1 micro','f1 weighted','method'])
for ignore in range(3):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    df=pd.DataFrame(np.concatenate((X_train,y_train.reshape(-1,1)), axis=1),columns=list(chain(*[x_list,'S','W','Y'])))
    
    x_range=list(pd.pivot_table(df,index=list(chain(*[x_list])),values=['W'],aggfunc=[np.sum])[('sum','W')].index) #[(i,j) for i in x_range_full[x_list[0]] for j in x_range_full[x_list[1]]]
    dist=rdata_analysis(df,x_range)
    bin=len(x_range)
    if dim>1:
        C=c_generate_higher(x_range)
    else:
        C=c_generate(x_range)
    dist['t_x']=dist['x_0']*0.5+dist['x_1']*0.5
    dist['v']=[v_value(dist['x_0'][i],dist['x_1'][i],dist['x'][i]) for i in range(bin)]
    px=np.matrix(dist['x']).T
    ptx=np.matrix(dist['t_x']).T
    V=np.matrix(dist['v']).T

    rdata_base=projection(df,baseline(C,e,px,ptx,V,K)[K*2],x_range,x_list)
    rdata_part2=projection(df,partial_repair(C,e,px,ptx,V,1.0e-2,K)[K*3],x_range,x_list)

    clf=RandomForestClassifier(max_depth=4).fit(X_train[:,0:dim],y_train,sample_weight=X_train[:,dim+1])
    y_pred=clf.predict(X_test[:,0:dim])
    new_row = pd.Series({'DI':DisparateImpact(X_test,y_pred),
                        'f1 macro':f1_score(y_test, y_pred, average='macro'),
                        'f1 micro':f1_score(y_test, y_pred, average='micro'),
                        'f1 weighted':f1_score(y_test, y_pred, average='weighted'),
                        'method':'origin'})
    X_train_base=rdata_base[x_list+['S','W']].to_numpy()
    y_train_base=rdata_base['Y'].to_numpy().astype('int8')
    clf_base=RandomForestClassifier(max_depth=4).fit(X_train_base[:,0:dim],y_train_base,sample_weight=X_train_base[:,dim+1])
    y_pred_base=clf_base.predict(X_test[:,0:dim])
    new_row_base = pd.Series({'DI':DisparateImpact(X_test,y_pred_base),
                    'f1 macro':f1_score(y_test, y_pred_base, average='macro'),
                    'f1 micro':f1_score(y_test, y_pred_base, average='micro'),
                    'f1 weighted':f1_score(y_test, y_pred_base, average='weighted'),
                    'method':'baseline'})
    X_train_part=rdata_part2[x_list+['S','W']].to_numpy()
    y_train_part=rdata_part2['Y'].to_numpy().astype('int8')
    clf_part= RandomForestClassifier(max_depth=4).fit(X_train_part[:,0:dim],y_train_part,sample_weight=X_train_part[:,dim+1])
    y_pred_part=clf_part.predict(X_test[:,0:dim])
    new_row_part = pd.Series({'DI':DisparateImpact(X_test,y_pred_part),
                    'f1 macro':f1_score(y_test, y_pred_part, average='macro'),
                    'f1 micro':f1_score(y_test, y_pred_part, average='micro'),
                    'f1 weighted':f1_score(y_test, y_pred_part, average='weighted'),
                    'method':'partial repair'})
    report = pd.concat([report,new_row.to_frame().T,new_row_base.to_frame().T,new_row_part.to_frame().T], ignore_index=True)

sum of violation of f: 0.0036467884335666923
sum of violation of g: 2.0415526907902048e-16
total cost: 0.3659321530724333
entropy: 2.9225917543694826
tr violation: [[0.31370058]]
sum of violation of f: 0.003075720232100548
sum of violation of g: 0.00035140190723314537
total cost: 0.8463600221109463
entropy: 3.217463869557872
tr violation: [[0.15994331]]


ZeroDivisionError: float division by zero

In [137]:
rdata_base

Unnamed: 0,education-num,age,S,W,Y
0,0,0,0,9.999978e-01,0
1,0,1,0,6.726550e-09,0
2,0,2,0,7.319469e-09,0
3,0,3,0,4.356417e-09,0
4,0,4,0,2.770531e-09,0
...,...,...,...,...,...
21115,15,0,1,5.000077e-14,1
21116,15,1,1,7.268475e-14,1
21117,15,2,1,1.164588e-13,1
21118,15,3,1,8.623069e-14,1


In [None]:
#plt.plot(range(bin),dist['x'])
plt.plot(range(bin),dist['x_0'])
plt.plot(range(bin),dist['x_1'])

In [113]:
report

Unnamed: 0,DI,f1 macro,f1 micro,f1 weighted,method
0,0.439913,0.590745,0.777545,0.73333,origin
1,0.439913,0.590745,0.777545,0.73333,baseline
2,0.131852,0.510866,0.770655,0.694695,partial repair
3,0.436802,0.596207,0.781366,0.738333,origin
4,0.436802,0.596207,0.781366,0.738333,baseline
5,0.236555,0.516974,0.774961,0.700461,partial repair
6,0.441021,0.583762,0.775661,0.729416,origin
7,0.441021,0.583762,0.775661,0.729416,baseline
8,0.316652,0.506475,0.770224,0.692412,partial repair
