In [142]:
import collections
import itertools
import logging
import os.path
import pickle
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.linear_model
import sklearn.preprocessing

from sklearn.metrics import check_scoring, r2_score, accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_validate, check_cv, KFold

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(name)s: %(message)s")

import compute

### Prepare data

In [2]:
fn_study_prepared = "studydata.pickle"
df_study1, df_study2 = compute.load_cached_data(fn_study_prepared)

if False:
    # fill missing columns
    for col in set(df_study2.columns).difference(df_study1.columns):
        df_study1.insert(len(df_study1.columns), col, None)

    # stack together (columns need to be same)
    cols = df_study2.columns.sort_values()
    pd.concat([df_study1[cols], df_study2[cols]], axis=0, ignore_index=True)

  from .autonotebook import tqdm as notebook_tqdm
[INFO] compute: Clean study data ...


In [37]:
# X, y train/test data

# X
df_study1_text = df_study1[[compute.COL_TEXT, compute.COL_TEXT_SPACY, compute.COL_TEXT_SPACY_CLEAN]]
df_study2_text = df_study2[[compute.COL_TEXT, compute.COL_TEXT_SPACY, compute.COL_TEXT_SPACY_CLEAN]]
df_both_text = pd.concat([df_study1_text, df_study2_text], axis=0).reset_index(drop=True)

# y
df_study1_scores = df_study1[compute.COLS_SCORES]
df_study2_scores = df_study2[compute.COLS_SCORES + compute.COLS_SCORES_S2]
df_both_scores = pd.concat([df_study1_scores, df_study2_scores[compute.COLS_SCORES]], axis=0).reset_index(drop=True)

# X
df_study1_liwc = df_study1[compute.COLS_LIWC_REL]
df_study2_liwc = df_study2[compute.COLS_LIWC_REL]
df_both_liwc = pd.concat([df_study1_liwc, df_study2_liwc], axis=0).reset_index(drop=True)

In [4]:
# build standard document-term matrix

def build_dtm_df(df_study, binarize=False):
    df = df_study[compute.COL_TEXT_SPACY_CLEAN]
    doc_term_mat, features = compute.build_feature_matrix(df, norm="l2", use_idf=True)

    # binarize (0/1 instead of floats)
    if binarize:
        doc_term_mat = np.array(np.vectorize(round)(doc_term_mat.todense()))
    else:
        doc_term_mat = doc_term_mat.toarray()

    return pd.DataFrame(doc_term_mat, columns=features)


binarize = False
# X
df_study1_dtm = build_dtm_df(df_study1_text, binarize=binarize)
df_study2_dtm = build_dtm_df(df_study2_text, binarize=binarize)
df_both_dtm = build_dtm_df(df_both_text, binarize=binarize)

In [5]:
# Rescale LIWC values into [0, 1] range, per column

def scale_liwc_df(df_study):
    scaler = StandardScaler()
    #scaler = MinMaxScaler()

    scaler.fit(df_study[compute.COLS_LIWC_REL])
    scaler.feature_names_in_
    #scaler.scale_, scaler.mean_  # StandardScaler

    data = scaler.transform(df_study[compute.COLS_LIWC_REL])
    return pd.DataFrame(data, columns=scaler.feature_names_in_)


# X
df_study1_liwc_scaled = scale_liwc_df(df_study1_liwc)
df_study2_liwc_scaled = scale_liwc_df(df_study2_liwc)
df_both_liwc_scaled = scale_liwc_df(df_both_liwc)

In [131]:
# Collapse variables into three classes (low/mid/high) based on quantiles

def quantize_scores(scores, as_str=False):
    # split by quantiles into 3 parts
    q33 = np.quantile(scores, 1 / 3)
    q66 = np.quantile(scores, 2 / 3)

    idx = np.digitize(scores, [q33, q66], right=True)

    # validate
    # assert all([
    #     v <= q33 if x == 0 else v > q33 and v <= q66 if x == 1 else v > q66
    #     for v, x in [(scores[i], x) for i, x in enumerate(idx)]
    # ])

    if not as_str:
        return idx

    # map to class labels
    return np.vectorize({0: "low", 1: "mid", 2: "high"}.get)(idx)


def quantize_score_df(df_scores, cols, as_str=False):
    all_scores = dict()
    for col in cols:
        all_scores[col] = quantize_scores(df_scores[col], as_str=as_str)
    return pd.DataFrame.from_dict(all_scores)


as_str = False
# y
df_study1_scores_cls = quantize_score_df(df_study1_scores, compute.COLS_SCORES, as_str=as_str)
df_study2_scores_cls = quantize_score_df(df_study2_scores, compute.COLS_SCORES + compute.COLS_SCORES_S2, as_str=as_str)
df_both_scores_cls = quantize_score_df(df_both_scores, compute.COLS_SCORES, as_str=as_str)

### Train Classifiers / Regressions

In [39]:
sels_data = ["DTM", "LIWC", "LIWC_S", "B", "B_S"]
sels_study = ["S1", "S2", "S1+S2"]
sels_var = ["power", "dominance", "prestige", "power_f", "dominance_f", "prestige_f"]


def select_data(sel_data, sel_study):
    df_dtm = None
    if sel_data == "DTM":
        if sel_study == "S1":
            df_dtm = df_study1_dtm
        elif sel_study == "S2":
            df_dtm = df_study2_dtm
        elif sel_study == "S1+S2":
            df_dtm = df_both_dtm
    elif sel_data == "LIWC":
        if sel_study == "S1":
            df_dtm = df_study1_liwc
        elif sel_study == "S2":
            df_dtm = df_study2_liwc
        elif sel_study == "S1+S2":
            df_dtm = df_both_liwc
    elif sel_data == "LIWC_S":
        if sel_study == "S1":
            df_dtm = df_study1_liwc_scaled
        elif sel_study == "S2":
            df_dtm = df_study2_liwc_scaled
        elif sel_study == "S1+S2":
            df_dtm = df_both_liwc_scaled
    elif sel_data == "B":
        if sel_study == "S1":
            df_dtm = pd.concat([df_study1_dtm, df_study1_liwc], axis=1)
        elif sel_study == "S2":
            df_dtm = pd.concat([df_study2_dtm, df_study2_liwc], axis=1)
        elif sel_study == "S1+S2":
            df_dtm = pd.concat([df_both_dtm, df_both_liwc], axis=1)
    elif sel_data == "B_S":
        if sel_study == "S1":
            df_dtm = pd.concat([df_study1_dtm, df_study1_liwc_scaled], axis=1)
        elif sel_study == "S2":
            df_dtm = pd.concat([df_study2_dtm, df_study2_liwc_scaled], axis=1)
        elif sel_study == "S1+S2":
            df_dtm = pd.concat([df_both_dtm, df_both_liwc_scaled], axis=1)
    assert df_dtm is not None
    return df_dtm


def select_scores(sel_study, sel_var):
    df_scores = None
    if sel_study == "S1":
        df_scores = df_study1_scores
    elif sel_study == "S2":
        df_scores = df_study2_scores
    elif sel_study == "S1+S2":
        df_scores = df_both_scores
    assert df_scores is not None
    col = f"s:{sel_var}"
    assert col in df_scores.columns
    return df_scores[col].to_numpy()

In [7]:
def run_trials(X, y, clf, p_grid, num_trials):
    scores = np.zeros(num_trials)
    params = []

    # Loop for each trial
    for i in range(num_trials):
        # cross-validation techniques for the inner and outer loops
        # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
        inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
        outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)

        # Non_nested parameter search and scoring
        # clf_gs = GridSearchCV(estimator=clf, param_grid=p_grid, cv=outer_cv)
        # clf_gs.fit(X, y)
        # clf.cv_results_

        # Nested CV with parameter optimization
        clf_gs = GridSearchCV(estimator=clf, param_grid=p_grid, cv=inner_cv, verbose=0)
        #score = cross_val_score(clf_gs, X=X, y=y, cv=outer_cv)

        scorer = check_scoring(clf_gs, scoring=None)
        cv_results = cross_validate(
            estimator=clf_gs,
            X=X,
            y=y,
            scoring={"score": scorer},
            cv=outer_cv,
            return_estimator=True
        )
        score = cv_results["test_score"]
        best_params = cv_results["estimator"][score.argmax()].best_params_

        scores[i] = score.mean()
        params.append(best_params)

    return scores, params

---

In [132]:
sel_study = "S1+S2"  # one of: S1, S2, S1+S2
sel_data = "B"       # one of: DTM, LIWC, LIWC_S, B (DTM, LIWC), B_S (DTM, LIWC_S)
sel_var = "power"    # one of: power, dominance, prestige, power_f, dominance_f, prestige_f

X = select_data(sel_data, sel_study).values
y = select_scores(sel_study, sel_var)

# convert continuous y variable to class variable
#y_cls = y.round()
#y_cls = y.astype(int)
y_cls = np.vectorize(round)(y)
y_lmh = quantize_scores(y)

In [9]:
clf = sklearn.svm.SVC(kernel="rbf")
p_grid = {"C": [1, 10, 100], "gamma": [0.01, 0.1]}

scores, params = run_trials(X, y_cls, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))

best params:  {'C': 1, 'gamma': 0.1}


Unnamed: 0,0.375,0.370,0.360,0.345,0.355
C,1.0,1.0,10.0,1.0,1.0
gamma,0.1,0.1,0.1,0.1,0.1


In [133]:
clf = sklearn.svm.SVC(kernel="rbf")
p_grid = {"C": [1, 10, 100], "gamma": [0.01, 0.1]}

scores, params = run_trials(X, y_lmh, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))

best params:  {'C': 1, 'gamma': 0.1}


Unnamed: 0,0.3825,0.3825.1,0.3275,0.3825.2
C,10.0,1.0,1.0,1.0
gamma,0.01,0.01,0.01,0.1


In [10]:
clf = sklearn.linear_model.LogisticRegression(solver="liblinear")
p_grid = {"C": [1, 0.1, 0.01], "max_iter": [10, 100, 500]}

scores, params = run_trials(X, y_cls, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))


best params:  {'C': 0.01, 'max_iter': 10}


Unnamed: 0,0.300,0.280,0.245,0.265,0.230
C,0.01,1,0.01,0.1,0.1
max_iter,10.0,10,10.0,10.0,10.0


In [134]:
clf = sklearn.linear_model.LogisticRegression(solver="liblinear")
p_grid = {"C": [1, 0.1, 0.01], "max_iter": [10, 100, 500]}

scores, params = run_trials(X, y_lmh, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))


best params:  {'C': 1, 'max_iter': 10}


Unnamed: 0,0.3700,0.3550,0.4150,0.3975
C,1,1,1,0.1
max_iter,100,100,100,10.0


In [11]:
clf = sklearn.linear_model.Lasso(selection="random")
p_grid = {"alpha": [10, 1, 0.1, 0.01], "max_iter": [250, 1000, 5000]}

scores, params = run_trials(X, y, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))

best params:  {'alpha': 10, 'max_iter': 250}


Unnamed: 0,-0.050571,-0.003739,-0.031928,-0.001635,-0.075270
alpha,10,10,10,10,10
max_iter,250,250,250,250,250


In [135]:
# this might not be best, we do some linear model but with classes?

clf = sklearn.linear_model.Lasso(selection="random")
p_grid = {"alpha": [10, 1, 0.1, 0.01], "max_iter": [250, 1000, 5000]}

scores, params = run_trials(X, y_lmh, clf, p_grid, 5)

print("best params: ", params[scores.argmax()])
pd.DataFrame.from_dict(dict(zip(scores, params)))

best params:  {'alpha': 10, 'max_iter': 250}


Unnamed: 0,-0.018485,-0.027791,-0.016605,-0.007605,-0.001174
alpha,10,10,10,10,10
max_iter,250,250,250,250,250


---

In [49]:
clfs = [
    (sklearn.svm.SVC(kernel="rbf"),
     {"C": [1, 10, 100], "gamma": [0.01, 0.1]}),
    (sklearn.linear_model.LogisticRegression(solver="liblinear"),
     {"C": [1, 0.1, 0.01], "max_iter": [10, 100, 500]}),
]
regrs = [
    (sklearn.linear_model.Lasso(),
     {"alpha": [10, 1, 0.1, 0.01], "max_iter": [250, 1000, 5000], "selection": ["cyclic", "random"]}),
]

results = dict()

for sel_study in sels_study:
    results[sel_study] = dict()
    print("#", "-" * 40)
    print(f"# Running for Study: {sel_study} ...")
    for sel_var in sels_var:
        results[sel_study][sel_var] = dict()
        print(f"## Running for Variable: ***[[{sel_var}]]*** ...")
        for sel_data in sels_data:
            results[sel_study][sel_var][sel_data] = dict()
            print(f"### Running on Data: {sel_data} ...")

            # data
            X = select_data(sel_data, sel_study).values
            y = select_scores(sel_study, sel_var)
            y_cls = np.vectorize(round)(y)

            # clf
            for clf, p_grid in clfs:
                print(f"#### Clf: {clf}")
                scores, params = run_trials(X, y_cls, clf, p_grid, 5)
                print(f"     best params: {params[scores.argmax()]} @ {scores.max():f} ±{scores.std():f}")
                results[sel_study][sel_var][sel_data][clf.__class__.__name__.rsplit(".", 1)[-1]] = pd.DataFrame.from_dict(dict(zip(scores, params)))

            # regr
            for reg, p_grid in regrs:
                print(f"#### Regr: {reg}")
                scores, params = run_trials(X, y, reg, p_grid, 5)
                print(f"     best params: {params[scores.argmax()]} @ {scores.max():f} ±{scores.std():f}")
                results[sel_study][sel_var][sel_data][reg.__class__.__name__.rsplit(".", 1)[-1]] = pd.DataFrame.from_dict(dict(zip(scores, params)))

            print()

# ----------------------------------------
# Running for Study: S1+S2 ...
## Running for Variable: ***[[power]]*** ...
### Running on Data: DTM ...
#### Clf: SVC()
     best params: {'C': 10, 'gamma': 0.1} @ 0.375000 ±0.022215
#### Clf: LogisticRegression(solver='liblinear')
     best params: {'C': 1, 'max_iter': 10} @ 0.367500 ±0.019144
#### Regr: Lasso()
     best params: {'alpha': 10, 'max_iter': 250, 'selection': 'cyclic'} @ -0.003178 ±0.006021

### Running on Data: LIWC ...
#### Clf: SVC()
     best params: {'C': 1, 'gamma': 0.01} @ 0.357500 ±0.010173
#### Clf: LogisticRegression(solver='liblinear')
     best params: {'C': 0.01, 'max_iter': 100} @ 0.332500 ±0.021829
#### Regr: Lasso()
     best params: {'alpha': 1, 'max_iter': 250, 'selection': 'cyclic'} @ 0.009783 ±0.010780

### Running on Data: LIWC_S ...
#### Clf: SVC()
     best params: {'C': 1, 'gamma': 0.01} @ 0.357500 ±0.014832
#### Clf: LogisticRegression(solver='liblinear')
     best params: {'C': 0.01, 'max_iter': 10} @ 

In [57]:
fn_results = "results.pickle"
if not os.path.exists(fn_results):
    with open(fn_results, "wb") as fp:
        pickle.dump(results, fp, protocol=pickle.HIGHEST_PROTOCOL)

with open(fn_results, "rb") as fp:
    results = pickle.load(fp)

In [73]:
for sel_study in sels_study:
    print("#", "-" * 40)
    print(f"# Running for Study: {sel_study} ...")
    for sel_var in sels_var:
        print(f"## Running for Variable: ***[[{sel_var}]]*** ...")
        for sel_data in sels_data:
            print(f"### Running on Data: {sel_data} ...")

            for est, df in results[sel_study][sel_var][sel_data].items():
                print(f"#### Estimator: {est}")
                scores = df.columns.values
                best_params = df.iloc[:,scores.argmax()].to_dict()
                print(f"     best params: {best_params} @ {scores.max():f} ±{scores.std():f}")

            print()

# ----------------------------------------
# Running for Study: S1 ...
## Running for Variable: ***[[power]]*** ...
### Running on Data: DTM ...
#### Estimator: SVC
     best params: {'C': 1.0, 'gamma': 0.01} @ 0.375000 ±0.026192
#### Estimator: LogisticRegression
     best params: {'C': 1, 'max_iter': 10} @ 0.375000 ±0.019900
#### Estimator: Lasso
     best params: {'alpha': 0.01, 'max_iter': 5000, 'selection': 'random'} @ -0.001360 ±0.029390

### Running on Data: LIWC ...
#### Estimator: SVC
     best params: {'C': 1.0, 'gamma': 0.01} @ 0.375000 ±0.002357
#### Estimator: LogisticRegression
     best params: {'C': 0.01, 'max_iter': 100.0} @ 0.290000 ±0.011456
#### Estimator: Lasso
     best params: {'alpha': 1, 'max_iter': 1000, 'selection': 'random'} @ 0.019622 ±0.029883

### Running on Data: LIWC_S ...
#### Estimator: SVC
     best params: {'C': 1.0, 'gamma': 0.1} @ 0.380000 ±0.012083
#### Estimator: LogisticRegression
     best params: {'C': 0.01, 'max_iter': 10.0} @ 0.295000 ±0.03

In [75]:
for sel_study in sels_study:
    print(f"Results in Data from Study: {sel_study}")

    for sel_var in sels_var:
        best = None
        for sel_data in sels_data:
            for clf, clf_results in results[sel_study][sel_var][sel_data].items():
                scores = clf_results.columns.values
                score = scores.mean()
                if best is None or score > best[0]:
                    best = (score, clf_results, clf, sel_data)
        print(f"  Best results for >>>{sel_var}<<< on data: '{best[3]}' with {best[0]:f}")
        scores = best[1].columns.values
        best_params = best[1].iloc[:,scores.argmax()].to_dict()
        print(f"  -> Clf: {best[2]} with params: {best_params}; {scores.max():f} ±{scores.std():f}")
        print()

Results in Data from Study: S1
  Best results for >>>power<<< on data: 'LIWC' with 0.373333
  -> Clf: SVC with params: {'C': 1.0, 'gamma': 0.01}; 0.375000 ±0.002357

  Best results for >>>dominance<<< on data: 'LIWC_S' with 0.268333
  -> Clf: SVC with params: {'C': 10.0, 'gamma': 0.01}; 0.315000 ±0.036818

  Best results for >>>prestige<<< on data: 'DTM' with 0.485000
  -> Clf: LogisticRegression with params: {'C': 0.1, 'max_iter': 10.0}; 0.485000 ±0.000000

  Best results for >>>power_f<<< on data: 'DTM' with 0.363000
  -> Clf: LogisticRegression with params: {'C': 1, 'max_iter': 10}; 0.395000 ±0.033257

  Best results for >>>dominance_f<<< on data: 'LIWC' with 0.400000
  -> Clf: SVC with params: {'C': 1.0, 'gamma': 0.01}; 0.400000 ±0.000000

  Best results for >>>prestige_f<<< on data: 'LIWC_S' with 0.572000
  -> Clf: SVC with params: {'C': 1.0, 'gamma': 0.01}; 0.580000 ±0.009274

Results in Data from Study: S2
  Best results for >>>power<<< on data: 'DTM' with 0.361000
  -> Clf: Log

---

In [146]:
sel_data = "LIWC"       # one of: DTM, LIWC, LIWC_S, B (DTM, LIWC), B_S (DTM, LIWC_S)
sel_var = "prestige"    # one of: power, dominance, prestige, power_f, dominance_f, prestige_f

X = select_data(sel_data, "S2").values
y = select_scores("S2", sel_var)
y_cls = quantize_scores(y)

X1 = select_data(sel_data, "S1").values
y1 = select_scores("S1", sel_var)
y1_cls = quantize_scores(y1)

clf = sklearn.svm.SVC(kernel="rbf")
p_grid = {"C": [1, 10, 100], "gamma": [0.01, 0.1]}

scores, params = run_trials(X, y_cls, clf, p_grid, 5)
best_params = params[scores.argmax()]

print(f"best params: {best_params} with {scores.max():f} ({scores.mean():f} ±{scores.std():f})")

clf.set_params(**best_params)
clf.fit(X, y_cls)
score = clf.score(X1, y1_cls)
print(f"train cross-validation score: {score:f}")

y_pred = clf.predict(X)
y1_pred = clf.predict(X1)
# "overfitting" on train dataset should about return 1.0 (max) as score
print(f"R² Score train: {r2_score(y_cls, y_pred)}, test: {r2_score(y1_cls, y1_pred)}")
print(f"Accuracy Score train: {accuracy_score(y_cls, y_pred)}, test: {accuracy_score(y1_cls, y1_pred)}")

best params: {'C': 1, 'gamma': 0.01} with 0.335000 (0.318000 ±0.014353)
train cross-validation score: 0.355000
R² Score train: 1.0, test: -1.36972276544346
Accuracy Score train: 1.0, test: 0.355


---

See sklearn: "Nested versus non-nested cross-validation"
@ https://scikit-learn.org/stable/auto_examples/model_selection/plot_nested_cross_validation_iris.html

Scratch area

In [None]:
from matplotlib import pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
import numpy as np

# Number of random trials
NUM_TRIALS = 30

# Set up possible values of parameters to optimize over
p_grid = {"C": [1, 10, 100], "gamma": [0.01, 0.1]}

# We will use a Support Vector Classifier with "rbf" kernel
svm = SVC(kernel="rbf")

#p_grid = {"C": [1, 10, 100]}
#svm = sklearn.linear_model.LogisticRegression()


def run_trials_both(clf, p_grid, num_trials):
    # Arrays to store scores
    non_nested_scores = np.zeros(num_trials)
    nested_scores = np.zeros(num_trials)

    # Loop for each trial
    for i in range(num_trials):

        # Choose cross-validation techniques for the inner and outer loops,
        # independently of the dataset.
        # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
        inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
        outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)

        # Non_nested parameter search and scoring
        clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=outer_cv)
        clf.fit(X, y)
        non_nested_scores[i] = clf.best_score_

        # Nested CV with parameter optimization
        clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)
        nested_score = cross_val_score(clf, X=X, y=y, cv=outer_cv)
        nested_scores[i] = nested_score.mean()

    return non_nested_scores, nested_scores


def plot_trials_both(non_nested_scores, nested_scores, num_trials):
    # Plot scores on each trial for nested and non-nested CV
    fig = plt.figure()
    ax1, ax2 = fig.subplots(2, 1, sharex=True)
    (non_nested_scores_line,) = ax1.plot(non_nested_scores, color="r")
    (nested_line,) = ax1.plot(nested_scores, color="b")

    ax1.set_title("Non-Nested and Nested Cross Validation", x=0.5, y=1.1, fontsize="15",)

    ax1.set_ylabel("score", fontsize="14")
    ax1.legend(
        [non_nested_scores_line, nested_line],
        ["Non-Nested CV", "Nested CV"],
        bbox_to_anchor=(0, 0.4, 0.5, 0),
    )
    
    # Plot bar chart of the difference.
    score_difference = non_nested_scores - nested_scores
    difference_plot = plt.bar(range(num_trials), score_difference)
    ax2.set_xlabel("Individual Trial #")
    ax2.set_ylabel("score difference", fontsize="14")
    ax2.legend(
        [difference_plot],
        ["Non-Nested CV - Nested CV Score"],
        bbox_to_anchor=(0, 1, 0.8, 0),
    )
    plt.close()

    return fig

non_nested_scores, nested_scores = run_trials_both(svm, p_grid, NUM_TRIALS)

score_difference = non_nested_scores - nested_scores
print("Average difference of {:6f} with std. dev. of {:6f}.".format(score_difference.mean(), score_difference.std()))

fig = plot_trials_both(non_nested_scores, nested_scores, NUM_TRIALS)
fig
