# Tort Law Experiment

In [1]:
from tort_dataset import *
from neural_networks import *

from tqdm import tqdm
from joblib import Parallel, delayed

%load_ext autoreload
%autoreload 2

# Increase resolution of plots
plt.rcParams['figure.dpi'] = 150

# Show more columns
pd.set_option('display.max_column',None)

### Initialize global parameters

In [2]:
# Datasets parameters
db_size = 5000
db_size_small = 500

# Network parameters
hidden_layers = [(12), (24, 6), (24,10,3)]
activation = 'logistic'
max_iter = 50000
learning_rate_init = 0.001
solver = 'adam'
batch_size = 50

### Initialize Regular Datasets

In [3]:
# Regular training dataset
train_df = generate_dataset(db_size)
X_train, y_train = preprocess(train_df, return_df=True)

# Smaller training dataset
small_train_df = generate_dataset(db_size_small)
X_train_small, y_train_small = preprocess(small_train_df, return_df=True)

# Regular test dataset
test_df = generate_dataset(db_size)
X_test, y_test = preprocess(test_df, return_df=True)

### Initialize Rationale Evaluation Datasets

In [4]:
# Unlawfulness
unl_df = generate_unlawful_dataset()
X_unl, y_unl = preprocess(unl_df)

# Imputability
imp_df = generate_imputability_dataset()
X_imp, y_imp = preprocess(imp_df)

# Caused
cau_df = generate_cause_dataset()
X_cau, y_cau = preprocess(cau_df)

# Damages
dmg_df = generate_damages_dataset()
X_dmg, y_dmg = preprocess(dmg_df)

# Violation-Purpose
vp_df = generate_violationpurpose_dataset()
X_vp, y_vp = preprocess(vp_df)

### Train networks

In [5]:
def create_network(layers):
    '''Creates an MLP with a given number of hidden layers'''
    return MLPClassifier(activation=activation, hidden_layer_sizes=layers, 
                         max_iter=max_iter, learning_rate_init=learning_rate_init,
                         solver=solver, batch_size=batch_size)

def train_networks(nn, X, y):
    nn.fit(X, y)
    return nn

In [6]:
# Create and train the neural networks on the regular training set
neural_nets = [create_network(hls) for hls in hidden_layers]   
neural_nets = Parallel(n_jobs=-1)(delayed(train_networks)(nn, X_train, y_train) 
                                 for nn in tqdm(neural_nets))

# # Create and train the neural networks on the smaller training set
neural_nets_small = [create_network(hls) for hls in hidden_layers]
neural_nets_small = Parallel(n_jobs=-1)(delayed(train_networks)(nn, X_train_small, y_train_small) 
                                 for nn in tqdm(neural_nets_small))

100%|██████████| 3/3 [00:00<00:00, 499.86it/s]
100%|██████████| 3/3 [00:00<00:00, 3000.93it/s]


## Test performance on regular dataset and rationale evaluation datasets

In [7]:
# Regular training set 
accs = pd.DataFrame.from_dict({'regular '+str(idx+1) + ' layers': {
    'regular': round(100*accuracy_score(y_test, nn.predict(X_test)), 2),
    'unl': round(100*accuracy_score(y_unl, nn.predict(X_unl)), 2),
    'imp': round(100*accuracy_score(y_imp, nn.predict(X_imp)), 2),
    'cau': round(100*accuracy_score(y_cau, nn.predict(X_cau)), 2),
    'dmg': round(100*accuracy_score(y_dmg, nn.predict(X_dmg)), 2),
    'vp': round(100*accuracy_score(y_vp, nn.predict(X_vp)), 2),
} for idx, nn in enumerate(neural_nets)}, orient='index')
accs.to_csv('results/accuracies/accuracies_tort.csv')
accs

Unnamed: 0,regular,unl,imp,cau,dmg,vp
regular 1 layers,100.0,100.0,100.0,100.0,100.0,100.0
regular 2 layers,100.0,100.0,100.0,100.0,100.0,100.0
regular 3 layers,100.0,100.0,100.0,100.0,100.0,100.0


In [8]:
# smaller training set 
accs_smaller = pd.DataFrame.from_dict({'smaller '+str(idx+1) + ' layers': {
    'regular': round(100*accuracy_score(y_test, nn.predict(X_test)), 2),
    'unl': round(100*accuracy_score(y_unl, nn.predict(X_unl)), 2),
    'imp': round(100*accuracy_score(y_imp, nn.predict(X_imp)), 2),
    'cau': round(100*accuracy_score(y_cau, nn.predict(X_cau)), 2),
    'dmg': round(100*accuracy_score(y_dmg, nn.predict(X_dmg)), 2),
    'vp': round(100*accuracy_score(y_vp, nn.predict(X_vp)), 2),
} for idx, nn in enumerate(neural_nets_small)}, orient='index')
accs_smaller.to_csv('results/accuracies/accuracies_tort_smaller.csv')
accs_smaller

Unnamed: 0,regular,unl,imp,cau,dmg,vp
smaller 1 layers,98.28,93.45,92.97,96.88,97.77,99.35
smaller 2 layers,98.92,97.02,94.53,98.66,98.66,98.7
smaller 3 layers,98.16,96.43,91.41,100.0,100.0,90.91


## Run the experiment multiple times

In [None]:
import copy 

def convert_accs(accs):
    '''
    Converts list of accuracies to mean and std
    '''
    return str(round(np.mean(accs), 2)) + ' ± ' + str(round(np.std(accs), 2))   

def merge_accuracies(accuracies):
    '''
    Merge the list of accuracies to a pandas dataframe with mean and standard deviations
    '''
    accs = copy.deepcopy(accuracies)
    results = accs[0]
    for idx, x in enumerate(accs):
        if idx == 0: continue
        for train_name, train_data in x.items():
            for test_name, test_data in train_data.items():
                results[train_name][test_name].append(test_data[0])
                if idx == len(accs)-1:
                    results[train_name][test_name] = convert_accs(results[train_name][test_name])
    return pd.DataFrame.from_dict(results).T

def run_experiment():
    '''
    Train and test neural networks
    '''

    accuracies = {}

    # Training datasets
    datasets_train = {
        'regular' : generate_dataset(5000),
        'smaller' : generate_dataset(500),
    }
    preprocessed_train = {name: preprocess(df) for name, df in datasets_train.items()}

    # Test datasets
    datasets_test = {
        'regular' : generate_dataset(5000),
        'unique' : generate_unique_dataset(),
        'unl' : generate_unlawful_dataset(),
        'imp' : generate_impudence_dataset(),
        'cau' : generate_cause_dataset(),
        'dmg' : generate_damages_dataset(),
        'bp' : generate_violationpurpose_dataset(),
    }
    preprocessed_test = {name: preprocess(df) for name, df in datasets_test.items()}
    
    for train_name, train_data in preprocessed_train.items():       
        # Create and train the neural networks on training set A
        neural_nets = [create_network(hls) for hls in hidden_layers]   
        neural_nets = Parallel(n_jobs=1)(delayed(train_networks)(nn, train_data[0], train_data[1]) 
                                         for nn in neural_nets)
        
        # Initialize accuracies data
        for idx, nn in enumerate(neural_nets):
            accuracies[train_name+'_'+str(idx+1)] = {test_name: [] for test_name in preprocessed_test.keys()}

        # Add accuracies
        for test_name, test_data in preprocessed_test.items():
            for idx, nn in enumerate(neural_nets):
                accuracies[train_name+'_'+str(idx+1)][test_name].append(100*accuracy_score(test_data[1], nn.predict(test_data[0])))

    return accuracies

In [None]:
# Number of runs
num_runs = 50
accuracies = Parallel(n_jobs=-1)(delayed(run_experiment)() for run in tqdm(range(0, num_runs)))
results = merge_accuracies(accuracies)
results.to_csv('results/accuracies/50_runs_tort.csv')
results