In [1]:
import sys
sys.path.append('../Util')
import pickle
import os

In [2]:
from IPython.display import Image

In [3]:
from evaluation import correlation, lr, perceptron, get_anova_dims, get_mi_dims
from preparation import prepare_dataset, read_datasets

In [4]:
from we import get_we, initiate_model

In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from tqdm import tqdm

In [7]:
from scipy.stats import f_oneway

In [8]:
from sklearn.metrics import mean_absolute_error, accuracy_score
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

In [9]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
models = [
    {
        'name': 'flaubert/flaubert_small_cased',
        'label': 'flau_small_c'
    },
    {
    
        'name': 'flaubert/flaubert_base_uncased', 
        'label': 'flau_base_u'

    },
    {
        'name': 'flaubert/flaubert_base_cased',
        'label': 'flau_base_c'
    },
    {
        'name': 'flaubert/flaubert_large_cased',
        'label': 'flau_large_c'
    },
    {
        'name': 'camembert/camembert-base',
        'label': 'cam_base'
    },
    {
        'name': 'xlm-roberta-large',
        'label': 'xlm_large'
    },
    {
    
        'name': 'xlm-roberta-base', 
        'label': 'xlm_base'

    },
    {
        'name': 'bert-base-multilingual-uncased',
        'label': 'bert_base_u'
    },
    {
        'name': 'distilbert-base-multilingual-cased',
        'label': 'distilbert_base'
    },
    {
        'name': 'bert-base-multilingual-cased',
        'label': 'bert_base_c'
    }
    
]

In [11]:
labels = [m['label'] for m in models]

In [12]:
def get_stable_dims(label, task):
    with open(f'../Data/best_results/{task}_dims.pickle', 'rb') as f:
        dims = pickle.load(f)

    with open(f'../Data/best_results/{task}_accs.pickle', 'rb') as f:
        accs = pickle.load(f)

    with open(f'../Data/best_results/{task}_medians.pickle', 'rb') as f:
        medians = pickle.load(f)
    
    all_best_dims = []
    for i in range(n_folds):
        test = {k: v[i] for k, v in accs[label].items()}
        all_best_dims.extend(dims[label][max(test, key=test.get)][i])
        
    d, c = np.unique(all_best_dims, return_counts=True)
    
    stable_dims = [x[0] for x in zip(d,c) if x[1] == n_folds]
    
    return stable_dims

In [13]:
tasks = [
    'gender_noun',
    'gender_adj',
    'gender_na',
    'number_noun',
    'number_adj',
    'number_na',
    'pos_noun',
    'pos_adj',
    'pos_verb',
    'semantic_person',
    'semantic_act'
]

In [14]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [15]:
clfs = [KNeighborsClassifier, GaussianNB, LogisticRegression, RandomForestClassifier, DecisionTreeClassifier]

In [16]:
all_dims_accs = pd.DataFrame(columns=[c.__name__ for c in clfs] + ['Mean'], index=tasks)

In [17]:
stable_dims_accs = pd.DataFrame(columns=[c.__name__ for c in clfs] + ['Mean'], index=tasks)

# Gender_Nouns

In [18]:
noun_we_with_features = read_datasets(
                            path = '../Data',
                            model_labels = labels,
                            file_name = 'all_nouns_we.csv'
                    )

In [19]:
# There are 3 extra feautres in addition to embedding dimensions in the file: number, gender, lemma, semantic info
feature_col_count = 4

# Feature to investigate in this notebook
feature = 'Gender'

In [20]:
pos = ['NOUN']

Split each model into train and test using k_fold cross validation:

In [21]:
X_noun_train = []
y_noun_train = []

X_noun_test = []
y_noun_test = []

In [22]:
n_folds = 5

In [23]:
for we in noun_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='feminine',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_noun_train.append(X_trains)
    X_noun_test.append(X_tests)
    
    y_noun_train.append(y_trains)
    y_noun_test.append(y_tests)

In [24]:
accs = []

In [25]:
# For all dims
for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_noun_train[i][k], y=y_noun_train[i][k])
            y_pred = clf.predict(X_noun_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.21it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 23.14it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.80it/s]
100%|█████████████████████████████████████████████| 5/5 [01:00<00:00, 12.08s/it]
100%|█████████████████████████████████████████████| 5/5 [00:21<00:00,  4.29s/it]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.53it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.20it/s]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.15it/s]
100%|█████████████████████████████████████████████| 5/5 [01:40<00:00, 20.07s/it]
100%|█████████████████████████████████████████████| 5/5 [00:41<00:00,  8.34s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.04it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.80it/s]
100%|███████████████████████

In [26]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [27]:
acc_df

Unnamed: 0,KNeighborsClassifier,GaussianNB,LogisticRegression,RandomForestClassifier,DecisionTreeClassifier
flau_small_c,0.697564,0.7341,0.955937,0.804633,0.621193
flau_base_u,0.56175,0.588379,0.637636,0.587949,0.539907
flau_base_c,0.584745,0.638534,0.759558,0.663397,0.55286
flau_large_c,0.77867,0.886585,0.934794,0.88075,0.699002
cam_base,0.518037,0.516642,0.545351,0.524768,0.509022
xlm_large,0.559876,0.547626,0.615811,0.568617,0.531479
xlm_base,0.538449,0.493427,0.642481,0.562059,0.529278
bert_base_u,0.529163,0.514017,0.590641,0.567134,0.523284
distilbert_base,0.567314,0.555663,0.705502,0.608738,0.526861
bert_base_c,0.539159,0.498058,0.52945,0.60356,0.543689


In [28]:
accs_stable = []

In [29]:
# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[0])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.58it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 95.90it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.77it/s]
100%|█████████████████████████████████████████████| 5/5 [00:26<00:00,  5.28s/it]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.25it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.75it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 160.37it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 10.82it/s]
100%|█████████████████████████████████████████████| 5/5 [00:21<00:00,  4.37s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.19it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.15it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 67.42it/s]
100%|███████████████████████

In [33]:
stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

In [39]:
stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[0]}_stable')

In [40]:
(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[0]}_diff')

# Gender_Adjectives

In [41]:
adj_we_with_features = read_datasets(
                            path = '../Data',
                            model_labels = labels,
                            file_name = 'all_adjs_we.csv'
                    )

In [42]:
feature_col_count = 3

In [43]:
X_adj_train = []
y_adj_train = []

X_adj_test = []
y_adj_test = []

In [44]:
for we in adj_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='feminine',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_adj_train.append(X_trains)
    X_adj_test.append(X_tests)
    
    y_adj_train.append(y_trains)
    y_adj_test.append(y_tests)

In [45]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_adj_train[i][k], y=y_adj_train[i][k])
            y_pred = clf.predict(X_adj_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_adj_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.82it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 42.46it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.63it/s]
100%|█████████████████████████████████████████████| 5/5 [00:22<00:00,  4.52s/it]
100%|█████████████████████████████████████████████| 5/5 [00:09<00:00,  1.83s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.93it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 31.12it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.36it/s]
100%|█████████████████████████████████████████████| 5/5 [00:37<00:00,  7.54s/it]
100%|█████████████████████████████████████████████| 5/5 [00:16<00:00,  3.21s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.49it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 25.16it/s]
100%|███████████████████████

In [46]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [47]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[1])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.41it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 27.56it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.87it/s]
100%|█████████████████████████████████████████████| 5/5 [00:37<00:00,  7.60s/it]
100%|█████████████████████████████████████████████| 5/5 [00:07<00:00,  1.49s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  9.19it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 200.11it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.11it/s]
100%|█████████████████████████████████████████████| 5/5 [00:19<00:00,  3.98s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.59it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.11it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 311.00it/s]
100%|███████████████████████

In [48]:
stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[1]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[1]}_diff')

# Gender_NA

In [49]:
feature_col_count = 6


In [50]:
all_we_with_features = read_datasets(
                            path = '../Data',
                            model_labels = labels,
                            file_name = 'all_unique_pos_we.csv'
                    )

In [51]:
pos = ['NOUN', 'ADJ']

In [52]:
X_na_train = []
y_na_train = []

X_na_test = []
y_na_test = []

In [53]:
for we in all_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable') & \
                                      (we.POS.isin(pos))],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='feminine',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_na_train.append(X_trains)
    X_na_test.append(X_tests)
    
    y_na_train.append(y_trains)
    y_na_test.append(y_tests)

In [54]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_na_train[i][k], y=y_na_train[i][k])
            y_pred = clf.predict(X_na_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_na_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  6.48it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 27.28it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.57it/s]
100%|█████████████████████████████████████████████| 5/5 [00:44<00:00,  8.94s/it]
100%|█████████████████████████████████████████████| 5/5 [00:16<00:00,  3.30s/it]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.09it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.91it/s]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.19s/it]
100%|█████████████████████████████████████████████| 5/5 [01:19<00:00, 15.93s/it]
100%|█████████████████████████████████████████████| 5/5 [00:35<00:00,  7.01s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.74it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 21.72it/s]
100%|███████████████████████

In [55]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [62]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[2])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 18.15it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 232.54it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 11.22it/s]
100%|█████████████████████████████████████████████| 5/5 [00:14<00:00,  2.83s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.18it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.29it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 228.19it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 18.24it/s]
100%|█████████████████████████████████████████████| 5/5 [00:15<00:00,  3.14s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.21it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 15.54it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 182.32it/s]
100%|███████████████████████

In [63]:
stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[2]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[2]}_diff')

# Number_Noun

In [64]:
# There are 3 extra feautres in addition to embedding dimensions in the file: number, gender, lemma, semantic info
feature_col_count = 4

# Feature to investigate in this notebook
feature = 'Number'

In [65]:
X_noun_train = []
y_noun_train = []

X_noun_test = []
y_noun_test = []

In [66]:
for we in noun_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='plural',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_noun_train.append(X_trains)
    X_noun_test.append(X_tests)
    
    y_noun_train.append(y_trains)
    y_noun_test.append(y_tests)

In [67]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_noun_train[i][k], y=y_noun_train[i][k])
            y_pred = clf.predict(X_noun_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  6.97it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 27.13it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.29it/s]
100%|█████████████████████████████████████████████| 5/5 [00:38<00:00,  7.79s/it]
100%|█████████████████████████████████████████████| 5/5 [00:18<00:00,  3.79s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.93it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 18.37it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.32it/s]
100%|█████████████████████████████████████████████| 5/5 [01:01<00:00, 12.39s/it]
100%|█████████████████████████████████████████████| 5/5 [00:26<00:00,  5.28s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.65it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 17.91it/s]
100%|███████████████████████

In [68]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [69]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[3])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[3]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[3]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.44it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 19.89it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.31it/s]
100%|█████████████████████████████████████████████| 5/5 [00:36<00:00,  7.22s/it]
100%|█████████████████████████████████████████████| 5/5 [00:14<00:00,  2.85s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.21it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 24.46it/s]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.14it/s]
100%|█████████████████████████████████████████████| 5/5 [00:33<00:00,  6.63s/it]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.04s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.13it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 162.73it/s]
100%|███████████████████████

# Number_Adj

In [70]:
feature_col_count = 3

In [71]:
for we in adj_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='plural',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_adj_train.append(X_trains)
    X_adj_test.append(X_tests)
    
    y_adj_train.append(y_trains)
    y_adj_test.append(y_tests)

In [72]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_adj_train[i][k], y=y_adj_train[i][k])
            y_pred = clf.predict(X_adj_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_adj_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.19it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.21it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.50it/s]
100%|█████████████████████████████████████████████| 5/5 [00:30<00:00,  6.07s/it]
100%|█████████████████████████████████████████████| 5/5 [00:12<00:00,  2.49s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.44it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  9.34it/s]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.01it/s]
100%|█████████████████████████████████████████████| 5/5 [00:51<00:00, 10.24s/it]
100%|█████████████████████████████████████████████| 5/5 [00:20<00:00,  4.03s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.69it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.43it/s]
100%|███████████████████████

In [73]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [74]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[4])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[4]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[4]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.29it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 34.49it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.55it/s]
100%|█████████████████████████████████████████████| 5/5 [00:28<00:00,  5.79s/it]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.00s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 18.29it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 300.39it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 34.49it/s]
100%|█████████████████████████████████████████████| 5/5 [00:08<00:00,  1.75s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.93it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 15.33it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 280.08it/s]
100%|███████████████████████

# Number_NA

In [75]:
feature_col_count = 6


In [76]:
X_na_train = []
y_na_train = []

X_na_test = []
y_na_test = []

In [77]:
for we in all_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable') & \
                                      (we.POS.isin(pos))],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='plural',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_na_train.append(X_trains)
    X_na_test.append(X_tests)
    
    y_na_train.append(y_trains)
    y_na_test.append(y_tests)

In [78]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_na_train[i][k], y=y_na_train[i][k])
            y_pred = clf.predict(X_na_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_na_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.29it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.06it/s]
100%|█████████████████████████████████████████████| 5/5 [00:09<00:00,  2.00s/it]
100%|█████████████████████████████████████████████| 5/5 [00:51<00:00, 10.36s/it]
100%|█████████████████████████████████████████████| 5/5 [00:19<00:00,  3.82s/it]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.51it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.41it/s]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.17s/it]
100%|█████████████████████████████████████████████| 5/5 [01:13<00:00, 14.75s/it]
100%|█████████████████████████████████████████████| 5/5 [00:31<00:00,  6.39s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.96it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  9.18it/s]
100%|███████████████████████

In [79]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [80]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[5])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[5]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[5]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.25it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 38.01it/s]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.01s/it]
100%|█████████████████████████████████████████████| 5/5 [00:27<00:00,  5.55s/it]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.13s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.59it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 44.94it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.92it/s]
100%|█████████████████████████████████████████████| 5/5 [00:25<00:00,  5.13s/it]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.41it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 14.01it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 378.56it/s]
100%|███████████████████████

# POS_Noun

In [81]:
feature_col_count = 6
feature = 'POS'
pos = ['NOUN']

In [82]:
X_noun_train = []
y_noun_train = []

X_noun_test = []
y_noun_test = []

In [83]:
for we in all_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='NOUN',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_noun_train.append(X_trains)
    X_noun_test.append(X_tests)
    
    y_noun_train.append(y_trains)
    y_noun_test.append(y_tests)

In [84]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_noun_train[i][k], y=y_noun_train[i][k])
            y_pred = clf.predict(X_noun_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.52it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 11.26it/s]
100%|█████████████████████████████████████████████| 5/5 [00:08<00:00,  1.64s/it]
100%|█████████████████████████████████████████████| 5/5 [01:27<00:00, 17.57s/it]
100%|█████████████████████████████████████████████| 5/5 [00:39<00:00,  7.96s/it]
100%|█████████████████████████████████████████████| 5/5 [00:05<00:00,  1.00s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.25it/s]
100%|█████████████████████████████████████████████| 5/5 [00:08<00:00,  1.69s/it]
100%|█████████████████████████████████████████████| 5/5 [01:53<00:00, 22.69s/it]
100%|█████████████████████████████████████████████| 5/5 [00:45<00:00,  9.02s/it]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.34it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.92it/s]
100%|███████████████████████

In [85]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [87]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[6])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[6]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[6]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.83it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 23.71it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.09it/s]
100%|█████████████████████████████████████████████| 5/5 [00:22<00:00,  4.58s/it]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.07it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 12.06it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 133.24it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 12.00it/s]
100%|█████████████████████████████████████████████| 5/5 [00:12<00:00,  2.50s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.30it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.66it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 216.62it/s]
100%|███████████████████████

# POS_Adj

In [88]:
X_adj_train = []
y_adj_train = []

X_adj_test = []
y_adj_test = []

In [89]:
for we in all_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='ADJ',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_adj_train.append(X_trains)
    X_adj_test.append(X_tests)
    
    y_adj_train.append(y_trains)
    y_adj_test.append(y_tests)

In [90]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_adj_train[i][k], y=y_adj_train[i][k])
            y_pred = clf.predict(X_adj_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_adj_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.66it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 22.00it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.41it/s]
100%|█████████████████████████████████████████████| 5/5 [00:16<00:00,  3.32s/it]
100%|█████████████████████████████████████████████| 5/5 [00:06<00:00,  1.34s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.82it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 15.50it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.26it/s]
100%|█████████████████████████████████████████████| 5/5 [00:22<00:00,  4.60s/it]
100%|█████████████████████████████████████████████| 5/5 [00:09<00:00,  1.95s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 12.04it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.90it/s]
100%|███████████████████████

In [91]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [92]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[7])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[7]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[7]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.28it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 27.12it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.08it/s]
100%|█████████████████████████████████████████████| 5/5 [00:21<00:00,  4.26s/it]
100%|█████████████████████████████████████████████| 5/5 [00:04<00:00,  1.22it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.68it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 237.25it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.23it/s]
100%|█████████████████████████████████████████████| 5/5 [00:08<00:00,  1.73s/it]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 17.48it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 13.58it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 376.82it/s]
100%|███████████████████████

# POS_Verb

In [93]:
X_verb_train = []
y_verb_train = []

X_verb_test = []
y_verb_test = []

In [94]:
for we in all_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='VERB',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_verb_train.append(X_trains)
    X_verb_test.append(X_tests)
    
    y_verb_train.append(y_trains)
    y_verb_test.append(y_tests)

In [95]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_verb_train[i][k], y=y_verb_train[i][k])
            y_pred = clf.predict(X_verb_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_verb_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.81it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 16.29it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.62it/s]
100%|█████████████████████████████████████████████| 5/5 [00:37<00:00,  7.47s/it]
100%|█████████████████████████████████████████████| 5/5 [00:18<00:00,  3.73s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.50it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 10.45it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.91it/s]
100%|█████████████████████████████████████████████| 5/5 [01:04<00:00, 12.93s/it]
100%|█████████████████████████████████████████████| 5/5 [00:29<00:00,  5.98s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.68it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 12.48it/s]
100%|███████████████████████

In [96]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [97]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[8])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[8]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[8]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  9.26it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 96.07it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.12it/s]
100%|█████████████████████████████████████████████| 5/5 [00:13<00:00,  2.62s/it]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.59it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.18it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 48.20it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.32it/s]
100%|█████████████████████████████████████████████| 5/5 [00:26<00:00,  5.32s/it]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.85it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 11.57it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 123.77it/s]
100%|███████████████████████

# Sem_Act

In [98]:
feature_col_count = 6

In [99]:
feature = 'Semantic'

In [100]:
X_noun_train = []
y_noun_train = []

X_noun_test = []
y_noun_test = []

In [101]:
for we in noun_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='Act',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_noun_train.append(X_trains)
    X_noun_test.append(X_tests)
    
    y_noun_train.append(y_trains)
    y_noun_test.append(y_tests)

In [102]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_noun_train[i][k], y=y_noun_train[i][k])
            y_pred = clf.predict(X_noun_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 61.92it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 137.94it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  9.66it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.51it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.02it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 51.26it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 78.62it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.11it/s]
100%|█████████████████████████████████████████████| 5/5 [00:03<00:00,  1.30it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.55it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 55.39it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 67.56it/s]
100%|███████████████████████

In [103]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [104]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[-1])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[-1]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[-1]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 76.55it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 154.71it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.35it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.48it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 46.84it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 173.61it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 622.60it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 425.20it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 10.05it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 385.24it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 170.24it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 608.86it/s]
100%|███████████████████████

# Sem_Person

In [105]:
X_noun_train = []
y_noun_train = []

X_noun_test = []
y_noun_test = []

In [106]:
for we in noun_we_with_features:
    X, y = prepare_dataset(dataset=we[(we.Gender != 'invariable') & (we.Number != 'invariable')],
                                           feature_col_count=feature_col_count,
                                           feature_name=feature,
                                           normalize=False,
                                           encode=True,
                                           encode_as1='Person',
                                           split=False,
                                           balance=True)
    X_trains = []
    y_trains = []
    
    X_tests = []
    y_tests = []
    
    X_folds = np.array_split(X, n_folds)
    y_folds = np.array_split(y, n_folds)
    
    for i in range(n_folds):
        X_trains.append(pd.DataFrame(np.concatenate(X_folds[:i] + X_folds[i+1:])))
        y_trains.append(np.concatenate(y_folds[:i] + y_folds[i+1:]))

        X_folds[i].columns = X_folds[i].columns.map(int)
        X_tests.append(X_folds[i])
        y_tests.append(y_folds[i])
        
    
    X_noun_train.append(X_trains)
    X_noun_test.append(X_tests)
    
    y_noun_train.append(y_trains)
    y_noun_test.append(y_tests)

In [107]:
accs = []

for i in range(len(models)):
    model_accs = {}
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            clf.fit(X=X_noun_train[i][k], y=y_noun_train[i][k])
            y_pred = clf.predict(X_noun_test[i][k])
            acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs.append(model_accs)

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 85.34it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 127.65it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.82it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.92it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  6.88it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 52.02it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 73.36it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.33it/s]
100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.70it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.58it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 50.06it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 74.50it/s]
100%|███████████████████████

In [108]:
acc_df = pd.DataFrame(data=accs, index=labels)

In [109]:
accs_stable = []

# For stable dims
for i in range(len(models)):
    model_accs = {}
    stable_dims = get_stable_dims(labels[i], tasks[-2])
    for j in range(len(clfs)):
        clf_accs = []
        for k in tqdm(range(n_folds)):
            clf = clfs[j]()
            try:
                clf.fit(X=X_noun_train[i][k][stable_dims], y=y_noun_train[i][k])
                y_pred = clf.predict(X_noun_test[i][k][stable_dims])
                acc = accuracy_score(y_pred=y_pred, y_true=y_noun_test[i][k])
            except:
                acc = 0
            clf_accs.append(acc)
        model_accs[clfs[j].__name__] = np.average(clf_accs)
    accs_stable.append(model_accs)

stable_acc_df = pd.DataFrame(data=accs_stable, index=labels)

stable_acc_df.to_latex(f'Classification_Accuracies/{tasks[-2]}_stable')

(stable_acc_df - acc_df).to_latex(f'Classification_Accuracies/{tasks[-2]}_diff')

100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 87.07it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 276.82it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 46.28it/s]
100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.84it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 33.75it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 335.55it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 835.55it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 535.04it/s]
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 10.65it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 341.96it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 252.42it/s]
100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 606.50it/s]
100%|███████████████████████