In [2]:
import pickle
import numpy as np
import pandas as pd
from data.dataprocessor import DataProcessor
from feature_extractors.normal_feature_extractor import NormalFeatureExtractor
from feature_extractors.transformer_feature_extractor import TransformerFeatureExtractor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
seed = 42

### Load and Subset `non-europe` Data

In [3]:
from collections import Counter

X_normal_non_europe = pickle.load(open('pickles/pickled_datasets/seed_42/out_of_domain_X_normal_chunks.pkl', 'rb')).to_numpy()

X_bigbird_non_europe = pickle.load(open('pickles/pickled_datasets/seed_42/out_of_domain_X_bigbird_chunks.pkl', 'rb'))
X_bigbird_fine_tuned_non_europe = pickle.load(open('pickles/pickled_datasets/seed_42/out_of_domain_X_bigbird_fine_tuned_chunks.pkl', 'rb'))

y_non_europe = pickle.load(open('pickles/pickled_datasets/seed_42/out_of_domain_y_chunks.pkl', 'rb'))
print(Counter(y_non_europe).most_common(50))
ix_non_europe = [(yi in [1, 7, 10, 20, 22]) for yi in y_non_europe]
print(Counter(y_non_europe[ix_non_europe]).most_common(5))

y_non_europe = y_non_europe[ix_non_europe]
X_normal_non_europe = X_normal_non_europe[ix_non_europe]
X_bigbird_non_europe = X_bigbird_non_europe[ix_non_europe]
X_bigbird_fine_tuned_non_europe = X_bigbird_fine_tuned_non_europe[ix_non_europe]

[(0, 1924), (21, 1805), (8, 1778), (12, 1754), (1, 1697), (13, 1676), (14, 1663), (7, 1637), (2, 1623), (15, 1610), (17, 1610), (10, 1609), (22, 1606), (18, 1590), (16, 1566), (5, 1556), (19, 1545), (20, 1535), (9, 1528), (4, 1519), (6, 1492), (3, 1489), (11, 1365)]
[(1, 1697), (7, 1637), (10, 1609), (22, 1606), (20, 1535)]


In [4]:
X_normal_non_europe_train = X_normal_non_europe[:int(len(X_normal_non_europe)*0.8)]
X_normal_non_europe_test = X_normal_non_europe[int(len(X_normal_non_europe)*0.8):]

X_bigbird_non_europe_train = X_bigbird_non_europe[:int(len(X_bigbird_non_europe)*0.8)]
X_bigbird_non_europe_test = X_bigbird_non_europe[int(len(X_bigbird_non_europe)*0.8):]

X_bigbird_fine_tuned_non_europe_train = X_bigbird_fine_tuned_non_europe[:int(len(X_bigbird_fine_tuned_non_europe)*0.8)]
X_bigbird_fine_tuned_non_europe_test = X_bigbird_fine_tuned_non_europe[int(len(X_bigbird_fine_tuned_non_europe)*0.8):]

y_non_europe_train = y_non_europe[:int(len(y_non_europe)*0.8)]
y_non_europe_test = y_non_europe[int(len(y_non_europe)*0.8):]

### Train Classifiers on `non_europe` Data

In [5]:
clf_normal_non_europe = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_normal_non_europe_train, y_non_europe_train)
clf_bigbird_non_europe = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_non_europe_train, y_non_europe_train)
clf_bigbird_fine_tuned_non_europe = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_fine_tuned_non_europe_train, y_non_europe_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  5.4min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =        25935     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.04082D+04    |proj g|=  1.24078D+04

At iterate   50    f=  4.26980D+03    |proj g|=  6.68165D+03

At iterate  100    f=  2.70294D+03    |proj g|=  5.70356D+03

At iterate  150    f=  1.76773D+03    |proj g|=  2.30535D+03

At iterate  200    f=  1.08808D+03    |proj g|=  2.19842D+03

At iterate  250    f=  5.65912D+02    |proj g|=  1.60782D+03

At iterate  300    f=  2.62086D+02    |proj g|=  1.18929D+03

At iterate  350    f=  1.23711D+02    |proj g|=  1.17689D+03

At iterate  400    f=  7.72126D+01    |proj g|=  1.41690D+02

At iterate  450    f=  6.70526D+01    |proj g|=  2.96791D+01

At iterate  500    f=  6.22416D+01    |proj g|=  2.26813D+01

At iterate  550    f=  5.96473D+01    |proj g|=  1.71391D+01

At iterate  600    f=  5.80398D+01    |proj g|=  3.00467D+01

At iterate  650    f=  5.6

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   20.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   11.2s finished


In [None]:
with open('pickles/new_clf_normal_non_europe_seed_42_chunks.pkl', 'wb') as f:
    pickle.dump(clf_normal_non_europe, f)
    
with open('pickles/new_clf_bigbird_non_europe_seed_42_chunks.pkl', 'wb') as f:
    pickle.dump(clf_bigbird_non_europe, f)
    
with open('pickles/new_clf_bigbird_fine_tuned_non_europe_seed_42_chunks.pkl', 'wb') as f:
    pickle.dump(clf_bigbird_fine_tuned_non_europe, f)

In [None]:
with open('pickles/new_clf_normal_non_europe_seed_42_chunks.pkl', 'rb') as f:
    clf_normal_non_europe = pickle.load(f)

with open('pickles/new_clf_bigbird_non_europe_seed_42_chunks.pkl', 'rb') as f:
    clf_bigbird_non_europe = pickle.load(f)
    
with open('pickles/new_clf_bigbird_fine_tuned_non_europe_seed_42_chunks.pkl', 'rb') as f:
    clf_bigbird_fine_tuned_non_europe = pickle.load(f)

### Load `toefl` Data

In [6]:
df_toefl = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index.csv')

df_toefl_train = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index-training.csv', header=None, names=['Filename', 'Prompt', 'Language', 'Score Level'])
df_toefl_train['Source'] = ['train'] * len(df_toefl_train)

df_toefl_test = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index-test.csv', header=None, names=['Filename', 'Prompt', 'Score Level'])
df_toefl_test = df_toefl[df_toefl['Filename'].isin(df_toefl_test['Filename'])]
df_toefl_test['Source'] = ['test'] * len(df_toefl_test)

df_toefl = pd.merge(df_toefl_train, df_toefl_test, how='outer')
df_toefl.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Filename,Prompt,Language,Score Level,Source
0,278.txt,P6,DEU,medium,train
1,348.txt,P1,TUR,high,train
2,666.txt,P2,ZHO,medium,train
3,733.txt,P6,TEL,medium,train
4,976.txt,P2,ARA,low,train


In [7]:
df_toefl['Language'].unique()

array(['DEU', 'TUR', 'ZHO', 'TEL', 'ARA', 'SPA', 'HIN', 'JPN', 'KOR',
       'FRA', 'ITA'], dtype=object)

In [8]:
def read_file_contents(filename):
    try:
        with open(f'./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/responses/original/{filename}', 'r') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        return None
    
def assign_label(iso_lang):
    language2label = {
        'JPN': 0,
        'ARA': 0,
        'KOR': 0,
        'HIN': 0,
        'TEL': 0,
        'ZHO': 0,
        'DEU': 1,
        'FRA': 7,
        'ITA': 10,
        'SPA': 20,
        'TUR': 22
    }
    return language2label[iso_lang]

In [9]:
df_toefl['Text'] = df_toefl['Filename'].apply(read_file_contents)
df_toefl['Label'] = df_toefl['Language'].apply(assign_label)
df_toefl = df_toefl[df_toefl['Label'] != 0]
df_toefl = df_toefl.reset_index(drop=True)
df_toefl.head()

Unnamed: 0,Filename,Prompt,Language,Score Level,Source,Text,Label
0,278.txt,P6,DEU,medium,train,IThe importance and popularity of travelling i...,1
1,348.txt,P1,TUR,high,train,"It is an important decision, how to plan your ...",22
2,1612.txt,P6,SPA,medium,train,"In my opinion, travel in group with a tour gui...",20
3,2024.txt,P3,DEU,medium,train,I thing the statement ''Young people nowadays ...,1
4,2664.txt,P2,DEU,high,train,Whether or not young people enjoy life more th...,1


In [10]:
df_toefl.Text.apply(lambda x: len(x.split(' '))).mean()

322.124

### Extract Features from `toefl` Data

In [11]:
normal_feature_extractor = pickle.load(open('pickles/normal_feature_extractor_seed_42_chunks.pkl', 'rb'))
big_bird_non_europe_feature_extractor = TransformerFeatureExtractor('google/bigbird-roberta-base', 2048)
big_bird_fine_tuned_non_europe_feature_extractor = TransformerFeatureExtractor('fine_tuned_models/out_of_domain_bigbird_roberta_base_clean_chunks', 2048)

def insert_grammar_features(all_features: np.array, grammar_features: np.array) -> np.array:
    return np.concatenate((all_features[:, :2300], grammar_features, all_features[:, 2300:]), axis=1)

X_normal_toefl = normal_feature_extractor.transform(df_toefl.Text.to_list(), grammar_mistakes=False).to_numpy()
grammar_features = normal_feature_extractor.get_grammar_features(df_toefl.Text.to_list())

X_normal_toefl = insert_grammar_features(X_normal_toefl, grammar_features)

X_bigbird_toefl = big_bird_non_europe_feature_extractor.transform(df_toefl.Text.to_list())
X_bigbird_fine_tuned_toefl = big_bird_fine_tuned_non_europe_feature_extractor.transform(df_toefl.Text.to_list())

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
normalizer.cc(50) LOG(INFO) precompiled_charsmap is empty. use identity normalization.
Some weights of the model checkpoint at google/bigbird-roberta-base were not used when initializing BigBirdModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BigBirdModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BigBirdModel from the checkpoint of a model that you expect to be exactly identical (initializing 

Creating word ngram features...
Creating char ngram features...


Calculating edit distance: 100%|███████████| 5000/5000 [00:11<00:00, 438.73it/s]
Extracting Substitution Features: 100%|████| 5000/5000 [00:15<00:00, 327.36it/s]
Extracting Function Word Features: 5000it [00:07, 651.17it/s]
Extracting POS Features: 5000it [01:09, 72.36it/s]
Extracting Average Sentence Length: 100%|█| 5000/5000 [00:00<00:00, 19836.46it/s
Extracting Grammar Features: 100%|██████████| 5000/5000 [07:49<00:00, 10.66it/s]
Extracting features: 100%|██████████████████| 5000/5000 [11:00<00:00,  7.57it/s]
Extracting features: 100%|██████████████████| 5000/5000 [11:18<00:00,  7.37it/s]


In [None]:
# with open('pickles/new_normal_toefl_seed_42_chunks.pkl', 'wb') as f:
#     pickle.dump(X_normal_toefl, f)
    
# with open('pickles/new_bigbird_toefl_seed_42_chunks.pkl', 'wb') as f:
#     pickle.dump(X_bigbird, f)
    
# with open('pickles/new_bigbird_fine_tuned_toefl_seed_42_chunks.pkl', 'wb') as f:
#     pickle.dump(X_bigbird_fine, f)

In [None]:
with open('pickles/new_normal_toefl_seed_42_chunks.pkl', 'rb') as f:
    X_normal_toefl = pickle.load(f)
    
with open('pickles/new_bigbird_toefl_seed_42_chunks.pkl', 'rb') as f:
    X_bigbird_toefl = pickle.load(f)
    
with open('pickles/new_bigbird_fine_tuned_toefl_seed_42_chunks.pkl', 'rb') as f:
    X_bigbird_fine_toefl = pickle.load(f)

In [12]:
toefl_train_ix = df_toefl.loc[df_toefl['Source'] == 'train'].index.to_list()
toefl_test_ix = df_toefl.loc[df_toefl['Source'] == 'test'].index.to_list()

X_normal_toefl_train = X_normal_toefl[toefl_train_ix]
X_normal_toefl_test = X_normal_toefl[toefl_test_ix]

X_bigbird_toefl_train = X_bigbird_toefl[toefl_train_ix]
X_bigbird_toefl_test = X_bigbird_toefl[toefl_test_ix]

X_bigbird_fine_tuned_toefl_train = X_bigbird_fine_tuned_toefl[toefl_train_ix]
X_bigbird_fine_tuned_toefl_test = X_bigbird_fine_tuned_toefl[toefl_test_ix]

y_toefl_train = df_toefl[df_toefl['Source'] == 'train']['Label'].to_list()
y_toefl_test = df_toefl[df_toefl['Source'] == 'test']['Label'].to_list()

In [13]:
clf_normal_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_normal_toefl_train, y_toefl_train)
clf_bigbird_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_toefl_train, y_toefl_train)
clf_bigbird_fine_tuned_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_fine_tuned_toefl_train, y_toefl_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


At iterate  800    f=  4.54626D+03    |proj g|=  3.47163D+01

At iterate  850    f=  4.54493D+03    |proj g|=  4.91730D+01

At iterate  900    f=  4.54417D+03    |proj g|=  4.59073D+01

At iterate  950    f=  4.54338D+03    |proj g|=  5.11069D+01

At iterate 1000    f=  4.54300D+03    |proj g|=  7.17308D+00

At iterate 1050    f=  4.54272D+03    |proj g|=  9.83205D+00

At iterate 1100    f=  4.54257D+03    |proj g|=  4.83812D+00

At iterate 1150    f=  4.54247D+03    |proj g|=  4.73653D+00

At iterate 1200    f=  4.54242D+03    |proj g|=  4.16935D+00

At iterate 1250    f=  4.54234D+03    |proj g|=  8.80099D+00

At iterate 1300    f=  4.54233D+03    |proj g|=  3.06038D+00

At iterate 1350    f=  4.54231D+03    |proj g|=  1.36517D+00

At iterate 1400    f=  4.54228D+03    |proj g|=  7.79950D+00

At iterate 1450    f=  4.54225D+03    |proj g|=  7.95727D-01

At iterate 1500    f=  4.54223D+03    |proj g|=  2.69240D+00

At iterate 1550    f=  4.54221D+03    |proj g|=  5.36927D+00

At itera

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.5min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   20.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.3s finished


In [14]:
df_scores = pd.DataFrame([], columns=['model', 'train', 'test', 'accuracy'])
i = 0
for outer_data in ['non_europe', 'toefl']:
    for inner_data in ['non_europe', 'toefl']:
        for clf in ['normal', 'bigbird', 'bigbird_fine_tuned']:
            eval(f"exec('pred_{clf}_{outer_data}_{inner_data} = clf_{clf}_{outer_data}.predict(X_{clf}_{inner_data}_test)')")
            if inner_data == 'non_europe':
                y_true = y_non_europe_test
            else:
                y_true = y_toefl_test
            acc = accuracy_score(y_true=y_true, y_pred=eval(f"pred_{clf}_{outer_data}_{inner_data}"))
#             print(f"Accuracy: {clf} \t" + ("\t" if 'fine' not in clf else "") + 
#                   f"({outer_data}) -> ({inner_data}) \t" + 
#                   ("\t" if inner_data == outer_data and inner_data == "toefl" else "") + 
#                   f" = {round(acc, 3):.3f}")
            df_scores.loc[i] = [clf, outer_data, inner_data, acc]
            i += 1

df_scores

Unnamed: 0,model,train,test,accuracy
0,normal,non_europe,non_europe,0.729128
1,bigbird,non_europe,non_europe,0.747681
2,bigbird_fine_tuned,non_europe,non_europe,0.820656
3,normal,non_europe,toefl,0.262
4,bigbird,non_europe,toefl,0.28
5,bigbird_fine_tuned,non_europe,toefl,0.37
6,normal,toefl,non_europe,0.406308
7,bigbird,toefl,non_europe,0.311688
8,bigbird_fine_tuned,toefl,non_europe,0.61039
9,normal,toefl,toefl,0.754


In [15]:
df_scores.pivot(index=["model", "train"], columns=["test"], values="accuracy").round(3)

Unnamed: 0_level_0,test,non_europe,toefl
model,train,Unnamed: 2_level_1,Unnamed: 3_level_1
bigbird,non_europe,0.748,0.28
bigbird,toefl,0.312,0.66
bigbird_fine_tuned,non_europe,0.821,0.37
bigbird_fine_tuned,toefl,0.61,0.56
normal,non_europe,0.729,0.262
normal,toefl,0.406,0.754


In [16]:
print(df_scores.pivot(index=["model"], columns=["train", "test"], values="accuracy").round(3).to_latex())

\begin{tabular}{lrrrr}
\toprule
train & \multicolumn{2}{l}{non\_europe} & \multicolumn{2}{l}{toefl} \\
test & non\_europe &  toefl & non\_europe &  toefl \\
model              &            &        &            &        \\
\midrule
bigbird            &      0.748 &  0.280 &      0.312 &  0.660 \\
bigbird\_fine\_tuned &      0.821 &  0.370 &      0.610 &  0.560 \\
normal             &      0.729 &  0.262 &      0.406 &  0.754 \\
\bottomrule
\end{tabular}



### Full TOEFL Performance

In [17]:
df_toefl = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index.csv')

df_toefl_train = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index-training.csv', header=None, names=['Filename', 'Prompt', 'Language', 'Score Level'])
df_toefl_train['Source'] = ['train'] * len(df_toefl_train)

df_toefl_test = pd.read_csv('./data/toefl/ETS_Corpus_of_Non-Native_Written_English/data/text/index-test.csv', header=None, names=['Filename', 'Prompt', 'Score Level'])
df_toefl_test = df_toefl[df_toefl['Filename'].isin(df_toefl_test['Filename'])]
df_toefl_test['Source'] = ['test'] * len(df_toefl_test)

df_toefl = pd.merge(df_toefl_train, df_toefl_test, how='outer')
df_toefl.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Filename,Prompt,Language,Score Level,Source
0,278.txt,P6,DEU,medium,train
1,348.txt,P1,TUR,high,train
2,666.txt,P2,ZHO,medium,train
3,733.txt,P6,TEL,medium,train
4,976.txt,P2,ARA,low,train


In [22]:
def full_assign_label(iso_lang):
    language2label = {
        'JPN': 1,
        'ARA': 2,
        'KOR': 3,
        'HIN': 4,
        'TEL': 5,
        'ZHO': 6,
        'DEU': 7,
        'FRA': 8,
        'ITA': 9,
        'SPA': 10,
        'TUR': 11
    }
    return language2label[iso_lang]

df_toefl['Text'] = df_toefl['Filename'].apply(read_file_contents)
df_toefl['Label'] = df_toefl['Language'].apply(full_assign_label)
df_toefl = df_toefl.reset_index(drop=True)
df_toefl.head()

Unnamed: 0,Filename,Prompt,Language,Score Level,Source,Text,Label
0,278.txt,P6,DEU,medium,train,IThe importance and popularity of travelling i...,7
1,348.txt,P1,TUR,high,train,"It is an important decision, how to plan your ...",11
2,666.txt,P2,ZHO,medium,train,Some people believe that young people can enjo...,6
3,733.txt,P6,TEL,medium,train,Travelling is usually considered as good recr...,5
4,976.txt,P2,ARA,low,train,i agree that . \nLife is a person live period...,2


In [30]:
normal_feature_extractor = pickle.load(open('pickles/normal_feature_extractor_seed_42_chunks.pkl', 'rb'))
big_bird_non_europe_feature_extractor = TransformerFeatureExtractor('google/bigbird-roberta-base', 2048)
big_bird_fine_tuned_non_europe_feature_extractor = TransformerFeatureExtractor('fine_tuned_models/out_of_domain_bigbird_roberta_base_clean_chunks', 2048)

def insert_grammar_features(all_features: np.array, grammar_features: np.array) -> np.array:
    return np.concatenate((all_features[:, :2300], grammar_features, all_features[:, 2300:]), axis=1)

X_normal_toefl = normal_feature_extractor.transform(df_toefl.Text.to_list(), grammar_mistakes=False).to_numpy()
grammar_features = normal_feature_extractor.get_grammar_features(df_toefl.Text.to_list())

X_normal_toefl = insert_grammar_features(X_normal_toefl, grammar_features)

X_bigbird_toefl = big_bird_non_europe_feature_extractor.transform(df_toefl.Text.to_list())
X_bigbird_fine_tuned_toefl = big_bird_fine_tuned_non_europe_feature_extractor.transform(df_toefl.Text.to_list())

Extracting features: 100%|████████████████| 11000/11000 [24:56<00:00,  7.35it/s]
Extracting features: 100%|████████████████| 11000/11000 [25:06<00:00,  7.30it/s]


In [31]:
toefl_train_ix = df_toefl.loc[df_toefl['Source'] == 'train'].index.to_list()
toefl_test_ix = df_toefl.loc[df_toefl['Source'] == 'test'].index.to_list()

X_normal_toefl_train = X_normal_toefl[toefl_train_ix]
X_normal_toefl_test = X_normal_toefl[toefl_test_ix]

X_bigbird_toefl_train = X_bigbird_toefl[toefl_train_ix]
X_bigbird_toefl_test = X_bigbird_toefl[toefl_test_ix]

X_bigbird_fine_tuned_toefl_train = X_bigbird_fine_tuned_toefl[toefl_train_ix]
X_bigbird_fine_tuned_toefl_test = X_bigbird_fine_tuned_toefl[toefl_test_ix]

y_toefl_train = df_toefl[df_toefl['Source'] == 'train']['Label'].to_list()
y_toefl_test = df_toefl[df_toefl['Source'] == 'test']['Label'].to_list()

In [32]:
clf_normal_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_normal_toefl_train, y_toefl_train)
clf_bigbird_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_toefl_train, y_toefl_train)
clf_bigbird_fine_tuned_toefl = LogisticRegression(random_state=seed, max_iter=5000, verbose=1).fit(X_bigbird_fine_tuned_toefl_train, y_toefl_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
25935   3282   3494      1     0     0   3.671D-02   8.682D+01
  F =   86.818630465012845     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =         3845     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  7.24247D+03    |proj g|=  2.08538D+02

At iterate   50    f=  5.25686D+03    |proj g|=  3.16670D+02

At iterate  100    f=  4.63685D+03    |proj g|=  4.71422D+02

At iterate  150    f=  4.31661D+03    |proj g|=  4.34104D+02

At iterate  200    f=  4.03375D+03    |proj g|=  5.52435D+01

At iterate  250    f=  3.94551D+03    |proj 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 11.6min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


At iterate  950    f=  3.38572D+02    |proj g|=  1.48177D+01

At iterate 1000    f=  3.37888D+02    |proj g|=  9.02887D+00

At iterate 1050    f=  3.37362D+02    |proj g|=  8.82779D+00

At iterate 1100    f=  3.36971D+02    |proj g|=  7.45144D+00

At iterate 1150    f=  3.36704D+02    |proj g|=  8.96141D+00

At iterate 1200    f=  3.36464D+02    |proj g|=  6.54725D+00

At iterate 1250    f=  3.36270D+02    |proj g|=  5.13316D+00

At iterate 1300    f=  3.36127D+02    |proj g|=  7.31698D+00

At iterate 1350    f=  3.35959D+02    |proj g|=  4.27350D+00

At iterate 1400    f=  3.35837D+02    |proj g|=  4.01296D+00

At iterate 1450    f=  3.35740D+02    |proj g|=  2.07514D+00

At iterate 1500    f=  3.35644D+02    |proj g|=  3.57226D+00

At iterate 1550    f=  3.35545D+02    |proj g|=  7.66062D+00

At iterate 1600    f=  3.35482D+02    |proj g|=  1.95406D+00

At iterate 1650    f=  3.35425D+02    |proj g|=  1.98147D+00

At iterate 1700    f=  3.35374D+02    |proj g|=  3.54877D+00

At itera

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.3min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   53.1s finished


In [33]:
new_df_scores = pd.DataFrame([], columns=['model', 'train', 'test', 'accuracy'])
i = 0
outer_data, inner_data = 'toefl', 'toefl'
for clf in ['normal', 'bigbird', 'bigbird_fine_tuned']:
    eval(f"exec('pred_{clf}_{outer_data}_{inner_data} = clf_{clf}_{outer_data}.predict(X_{clf}_{inner_data}_test)')")
    if inner_data == 'non_europe':
        y_true = y_non_europe_test
    else:
        y_true = y_toefl_test
    acc = accuracy_score(y_true=y_true, y_pred=eval(f"pred_{clf}_{outer_data}_{inner_data}"))
    new_df_scores.loc[i] = [clf, outer_data, inner_data, acc]
    i += 1

new_df_scores

Unnamed: 0,model,train,test,accuracy
0,normal,toefl,toefl,0.657273
1,bigbird,toefl,toefl,0.585455
2,bigbird_fine_tuned,toefl,toefl,0.451818
