# Notebook for testing different embeddings

In [2]:
import datautils
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import f1_score
import pandas as pd

In [4]:
PATH = './Dataset/power-gb-train.tsv'
RES_DIR = './Results/'
EMBED_DIR = './Embeddings/'

## Test tf-idf embeddings

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X_train, y_train, X_val, y_val, _, _ = datautils.split_kfold_dataset(PATH)

for idx, (tr_fold, val_fold) in enumerate(zip(X_train, X_val)):

    X_train[idx], vectorizer = datautils.tf_idf_preprocessing(tr_fold)
    X_val[idx] = vectorizer.transform(val_fold)

In [None]:
hyperparameters = {
    "penalty": ["l2"],
    "C": [0.1, 1.0, 10.0, 100.0, 1000.0],
    "solver": ["lbfgs"],
    "max_iter": [100, 200, 500],
}

param_grid = list(ParameterGrid(hyperparameters))

results_df = pd.DataFrame(
    columns=["penalty", "C", "solver", "max_iter", "Fold", "F1 Score"]
)

for par in param_grid:
    for idx, (X_train_fold, y_train_fold, X_val_fold, y_val_fold) in enumerate(
        zip(X_train, y_train, X_val, y_val)
    ):
        model = LogisticRegression(**par)
        model.fit(X_train_fold, y_train_fold)

        # Compute F1 score on validation set
        y_val_pred = model.predict(X_val_fold)
        f1_macro = f1_score(y_val_fold, y_val_pred, average="macro")

        print(f"Fold: {idx}")
        print(f"Parameters: {par}")
        print(f"\tF1 score: {f1_macro}")
        results_df = pd.concat(
            [
                results_df,
                pd.DataFrame(
                    {
                        "penalty": par["penalty"],
                        "C": par["C"],
                        "solver": par["solver"],
                        "max_iter": par["max_iter"],
                        "Fold": idx,
                        "F1 Score": f1_macro,
                    },
                    index=[0],
                ),
            ],
            ignore_index=True,
        )

results_df.to_csv(RES_DIR"results-Logistic-tfidf.csv", index=False)

In [None]:
mean_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].mean()
std_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].std()


In [None]:
mean_f1

## Test word2vec embeddings

In [11]:
from sklearn.linear_model import LogisticRegression
from gensim.utils import simple_preprocess
from gensim.models import Word2Vec
from datautils import documents_vector

In [12]:
X_train, y_train, X_val, y_val, _, _ = datautils.split_kfold_dataset(PATH)

for idx, (tr_fold, val_fold) in enumerate(zip(X_train, X_val)):
    tr_fold = list(map(simple_preprocess, tr_fold))
    val_fold = list(map(simple_preprocess, val_fold))

    # w2v model training
    modelw2v = Word2Vec(
        tr_fold,
        vector_size=150,
        window=10,
        min_count=2,
        workers=8,
    )

    # Save word vectors
    word_vectors = modelw2v.wv
    word_vectors.save(f"./Embeddings/w2v-fold{idx}.kv")

    # Pooling
    X_train[idx] = documents_vector(tr_fold, modelw2v)
    X_val[idx] = documents_vector(val_fold, modelw2v)

In [13]:
hyperparameters = {
    "penalty": ["l2"],
    "C": [0.1, 1.0, 10.0, 100.0, 1000.0],
    "solver": ["lbfgs"],
    "max_iter": [100, 200, 500],
}

param_grid = list(ParameterGrid(hyperparameters))

results_df = pd.DataFrame(
    columns=["penalty", "C", "solver", "max_iter", "Fold", "F1 Score"]
)

for par in param_grid:
    for idx, (X_train_fold, y_train_fold, X_val_fold, y_val_fold) in enumerate(
        zip(X_train, y_train, X_val, y_val)
    ):
        model = LogisticRegression(**par)
        model.fit(X_train_fold, y_train_fold)

        # Compute F1 score on validation set
        y_val_pred = model.predict(X_val_fold)
        f1_macro = f1_score(y_val_fold, y_val_pred, average="macro")

        print(f"Fold: {idx}")
        print(f"Parameters: {par}")
        print(f"\tF1 score: {f1_macro}")
        results_df = pd.concat(
            [
                results_df,
                pd.DataFrame(
                    {
                        "penalty": par["penalty"],
                        "C": par["C"],
                        "solver": par["solver"],
                        "max_iter": par["max_iter"],
                        "Fold": idx,
                        "F1 Score": f1_macro,
                    },
                    index=[0],
                ),
            ],
            ignore_index=True,
        )

results_df.to_csv(RES_DIR+"results-Logistic-w2v-batch1.csv", index=False)

Fold: 0
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7240671936351502


  results_df = pd.concat(


Fold: 1
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7190333539225358
Fold: 2
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7143804443574435
Fold: 3
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7192156020400777
Fold: 4
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7171419909651418
Fold: 0
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7240671936351502
Fold: 1
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7190333539225358
Fold: 2
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7143804443574435
Fold: 3
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7192156020400777
Fold: 4
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 

In [14]:
mean_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].mean()
std_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].std()


In [15]:
mean_f1

penalty  C       solver  max_iter
l2       0.1     lbfgs   100         0.718768
                         200         0.718768
                         500         0.718768
         1.0     lbfgs   100         0.721047
                         200         0.721047
                         500         0.721047
         10.0    lbfgs   100         0.721544
                         200         0.721544
                         500         0.721544
         100.0   lbfgs   100         0.721498
                         200         0.721498
                         500         0.721498
         1000.0  lbfgs   100         0.721545
                         200         0.721545
                         500         0.721545
Name: F1 Score, dtype: float64

## Test fasttext embeddings

In [3]:
from sklearn.linear_model import LogisticRegression
from gensim.utils import simple_preprocess
from gensim.models import fasttext
from datautils import documents_vector

In [4]:
X_train, y_train, X_val, y_val, _, _ = datautils.split_kfold_dataset(PATH)

for idx, (tr_fold, val_fold) in enumerate(zip(X_train, X_val)):
    tr_fold = list(map(simple_preprocess, tr_fold))
    val_fold = list(map(simple_preprocess, val_fold))

    # w2v model training
    model_ftx = fasttext.FastText(
            sentences=tr_fold,
            vector_size=150,
            window=10,
            min_count=2,
            workers=10,
        )

    # Save word vectors
    word_vectors = model_ftx.wv
    word_vectors.save(f"./Embeddings/ftx-fold{idx}.kv")

    # Pooling
    X_train[idx] = documents_vector(tr_fold, model_ftx)
    X_val[idx] = documents_vector(val_fold, model_ftx)

In [8]:
hyperparameters = {
    "penalty": ["l2"],
    "C": [0.1, 1.0, 10.0, 100.0, 1000.0],
    "solver": ["lbfgs"],
    "max_iter": [100, 200, 500,1000],
}

param_grid = list(ParameterGrid(hyperparameters))

results_df = pd.DataFrame(
    columns=["penalty", "C", "solver", "max_iter", "Fold", "F1 Score"]
)

for par in param_grid:
    for idx, (X_train_fold, y_train_fold, X_val_fold, y_val_fold) in enumerate(
        zip(X_train, y_train, X_val, y_val)
    ):
        model = LogisticRegression(**par)
        model.fit(X_train_fold, y_train_fold)

        # Compute F1 score on validation set
        y_val_pred = model.predict(X_val_fold)
        f1_macro = f1_score(y_val_fold, y_val_pred, average="macro")

        print(f"Fold: {idx}")
        print(f"Parameters: {par}")
        print(f"\tF1 score: {f1_macro}")
        results_df = pd.concat(
            [
                results_df,
                pd.DataFrame(
                    {
                        "penalty": par["penalty"],
                        "C": par["C"],
                        "solver": par["solver"],
                        "max_iter": par["max_iter"],
                        "Fold": idx,
                        "F1 Score": f1_macro,
                    },
                    index=[0],
                ),
            ],
            ignore_index=True,
        )

results_df.to_csv(RES_DIR+"results-Logistic-ftx-batch1.csv", index=False)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  results_df = pd.concat(


Fold: 0
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7125378383815699


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 1
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7075759729670192


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7087589366945992


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 3
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7170629598584335


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 4
Parameters: {'C': 0.1, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7109510781232122


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 0
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7119477169786945
Fold: 1
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7084307259751759


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7086885496984883
Fold: 3
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7176799149614868
Fold: 4
Parameters: {'C': 0.1, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7106019852223948
Fold: 0
Parameters: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7114874285339996
Fold: 1
Parameters: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7084307259751759
Fold: 2
Parameters: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7083398824928611
Fold: 3
Parameters: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7176799149614868
Fold: 4
Parameters: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7106019852223948
Fold: 0
Parameters: {'C': 0.1, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 0
Parameters: {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7141933761888104


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 1
Parameters: {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7093012247125992


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7140620374310689


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 3
Parameters: {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7141376185164529


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 4
Parameters: {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7137799432154306
Fold: 0
Parameters: {'C': 1.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7157660828442516
Fold: 1
Parameters: {'C': 1.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7113553891921933
Fold: 2
Parameters: {'C': 1.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7122091321833794
Fold: 3
Parameters: {'C': 1.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7143725880828655
Fold: 4
Parameters: {'C': 1.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7127069370624528
Fold: 0
Parameters: {'C': 1.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7157660828442516
Fold: 1
Parameters: {'C': 1.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7113553891921933
Fold: 2
Parameters: {'C': 1.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 0
Parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7142987538604576


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 1
Parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7103224880361281


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7149903690117749


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 3
Parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7117861316781646


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 4
Parameters: {'C': 10.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7137503743370444
Fold: 0
Parameters: {'C': 10.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7158878248910285
Fold: 1
Parameters: {'C': 10.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7106717074598339


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 10.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7108188381359561
Fold: 3
Parameters: {'C': 10.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7124050118179277
Fold: 4
Parameters: {'C': 10.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7125528643784617
Fold: 0
Parameters: {'C': 10.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7158878248910285
Fold: 1
Parameters: {'C': 10.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7106717074598339
Fold: 2
Parameters: {'C': 10.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7112452806531158
Fold: 3
Parameters: {'C': 10.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7124050118179277
Fold: 4
Parameters: {'C': 10.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7125528643784617
Fold: 0
Parameters: {'C': 10.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lb

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 0
Parameters: {'C': 100.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7141530294128609


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 1
Parameters: {'C': 100.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7099732686124225


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 100.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.71495200872988


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 3
Parameters: {'C': 100.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7110156640489321


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 4
Parameters: {'C': 100.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.713123693707275
Fold: 0
Parameters: {'C': 100.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.716276804389956
Fold: 1
Parameters: {'C': 100.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.711216503240808


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 100.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7119794610415533
Fold: 3
Parameters: {'C': 100.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.712595989110929
Fold: 4
Parameters: {'C': 100.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7126320042162237
Fold: 0
Parameters: {'C': 100.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.716276804389956
Fold: 1
Parameters: {'C': 100.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.711216503240808
Fold: 2
Parameters: {'C': 100.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7114386767434789
Fold: 3
Parameters: {'C': 100.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.712595989110929
Fold: 4
Parameters: {'C': 100.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7126320042162237
Fold: 0
Parameters: {'C': 100.0, 'max_iter': 1000, 'penalty': 'l2', 'solver'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 0
Parameters: {'C': 1000.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.715117542176522


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 1
Parameters: {'C': 1000.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7103070431480971


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 2
Parameters: {'C': 1000.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7127893529151399


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 3
Parameters: {'C': 1000.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.712595989110929


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Fold: 4
Parameters: {'C': 1000.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7145211679973815
Fold: 0
Parameters: {'C': 1000.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7164309158264772
Fold: 1
Parameters: {'C': 1000.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7114120343696138
Fold: 2
Parameters: {'C': 1000.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7113204443798595
Fold: 3
Parameters: {'C': 1000.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.712595989110929
Fold: 4
Parameters: {'C': 1000.0, 'max_iter': 200, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7122843115413493
Fold: 0
Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7164309158264772
Fold: 1
Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	F1 score: 0.7114120343696138
Fold: 2
Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2

In [9]:
mean_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].mean()
std_f1 = results_df.groupby(['penalty', 'C', 'solver', 'max_iter'])['F1 Score'].std()

In [10]:
mean_f1

penalty  C       solver  max_iter
l2       0.1     lbfgs   100         0.711377
                         200         0.711470
                         500         0.711308
                         1000        0.711308
         1.0     lbfgs   100         0.713095
                         200         0.713282
                         500         0.713282
                         1000        0.713282
         10.0    lbfgs   100         0.713030
                         200         0.712467
                         500         0.712553
                         1000        0.712553
         100.0   lbfgs   100         0.712644
                         200         0.712940
                         500         0.712832
                         1000        0.712832
         1000.0  lbfgs   100         0.713066
                         200         0.712809
                         500         0.712809
                         1000        0.712809
Name: F1 Score, dtype: float64