Preliminaries

In [3]:
# Importing libraries
import nltk
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import pprint, time

import nltk
import sklearn_crfsuite
from sklearn_crfsuite import metrics

from nltk.corpus import treebank


from nltk.classify import MaxentClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import pycrfsuite
from nltk.tag import hmm
from nltk.classify import megam
from sklearn.model_selection import GridSearchCV
from nltk.tag import BrillTaggerTrainer
from nltk.tag import UnigramTagger
from nltk.tag import DefaultTagger

In [5]:
#download the treebank corpus from nltk
nltk.download('treebank')
 
#download the universal tagset from nltk
nltk.download('universal_tagset')
 
#reading the Treebank tagged sentences
tagged_sentences = list(nltk.corpus.treebank.tagged_sents(tagset='universal'))

[nltk_data] Downloading package treebank to
[nltk_data]     C:\Users\kstap\AppData\Roaming\nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\kstap\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


Corruption function

In [6]:
def corrupt_training_set(corpus, switch_prob):
    """
    Corrupt a tagged corpus by randomly switching POS tags with a given probability.

    Args:
    corpus (list): A list of tagged sentences.
    switch_prob (float): The probability of switching out a POS tag for corruption.

    Returns:
    list: The corrupted tagged corpus.
    int: The number of errors induced.
    """
    corrupted_corpus = []
    error_count = 0

    for sentence in corpus:
        corrupted_sentence = []
        for word, pos_tag in sentence:
            if random.random() < switch_prob:
                # Randomly choose a new POS tag (ensure it's different from the original)
                new_pos_tag = pos_tag
                while new_pos_tag == pos_tag:
                    new_pos_tag = random.choice(['NOUN', 'VERB', 'ADJ', 'ADV', 'PRON', 'DET', 'NUM', 'CONJ', 'PRT', '.', 'X'])
                corrupted_sentence.append((word, new_pos_tag))
                error_count += 1
            else:
                corrupted_sentence.append((word, pos_tag))
        corrupted_corpus.append(corrupted_sentence)

    return corrupted_corpus, error_count

Feature function and feature extraction

In [4]:
def word_features(sentence, i):

    """
    Extract features for a given index in a sentence.

    Parameters:
    - sentence: List of feature-label pairs.
    - i: index

    Returns:
    - features: a dictionary of features on a given index.
    """
    word = sentence[i][0]
    tag = sentence[i][1]
    features = {
        'word': word,
        'is_first': i == 0,  # if the word is the first word
        'is_last': i == len(sentence) - 1,  # if the word is the last word
        'is_capitalized': word[0].upper() == word[0],
        'is_all_caps': word.upper() == word,  # word is in uppercase
        'is_all_lower': word.lower() == word,  # word is in lowercase
        # prefix of the word
        'prefix-1': word[0],
        'prefix-2': word[:2],
        'prefix-3': word[:3],
        # suffix of the word
        'suffix-1': word[-1],
        'suffix-2': word[-2:],
        'suffix-3': word[-3:],
        # extracting previous word
        'prev_word': '' if i == 0 else sentence[i - 1][0],
        # extracting next word
        'next_word': '' if i == len(sentence) - 1 else sentence[i + 1][0],
        'has_hyphen': '-' in word,  # if word has a hyphen
        'is_numeric': word.isdigit(),  # if word is numeric
        'capitals_inside': word[1:].lower() != word[1:]
    }

    # Add previous tag and its previous tag
    prev_tag = '' if i == 0 else sentence[i - 1][1]
    prev_prev_tag = '' if i < 2 else sentence[i - 2][1]
    features['prev_prev_tag'] = f'{prev_prev_tag}_{prev_tag}'

    # Add word after the next word
    features['next_next_word'] = '' if i > len(sentence) - 3 else sentence[i + 2][0]

    # Add word before the previous word
    features['prev_prev_word'] = '' if i < 2 else sentence[i - 2][0]

    return features

In [5]:
# Extract CRF features
X = []
y = []
for sentence in tagged_sentences:
	X_sentence = []
	y_sentence = []
	for i in range(len(sentence)):
		X_sentence.append(word_features(sentence, i))
		y_sentence.append(sentence[i][1])
	X.append(X_sentence)
	y.append(y_sentence)

Cross validation for MEMM and CRF

In [6]:
# CV tuning function

from sklearn.model_selection import KFold

def train_and_tune_memm(MEMM_train, max_iter_values, num_folds=5):
    """
    Train and tune a Maximum Entropy Markov Model (MEMM) using cross-validation.

    Parameters:
    - MEMM_train: List of feature-label pairs for training.
    - max_iter_values: List of max_iter values to tune.
    - num_folds: Number of folds for cross-validation.

    Returns:
    - best_max_iter: The best max_iter value found.
    - best_f1: The F1 score achieved with the best max_iter value.
    """

    # Initialize variables to keep track of the best max_iter and its associated F1 score
    best_max_iter = None
    best_f1 = 0.0

    # Define the number of folds for cross-validation
    kf = KFold(n_splits=num_folds)

    for max_iter in max_iter_values:
        f1_scores = []

        for train_idx, valid_idx in kf.split(MEMM_train):
            train_set = [MEMM_train[i] for i in train_idx]
            valid_set = [MEMM_train[i] for i in valid_idx]

            maxent_classifier = MaxentClassifier.train(train_set, algorithm='gis', max_iter=max_iter)

            valid_features = [features for features, _ in valid_set]
            valid_labels = [pos for _, pos in valid_set]

            predictions = [maxent_classifier.classify(features) for features in valid_features]

            f1 = f1_score(valid_labels, predictions, average='weighted')
            f1_scores.append(f1)

        # Calculate the average F1 score across folds
        avg_f1 = np.mean(f1_scores)

        # Check if this max_iter gives a better F1 score than the current best
        if avg_f1 > best_f1:
            best_max_iter = max_iter
            best_f1 = avg_f1

    return best_max_iter

# Example usage:
# best_max_iter, best_f1 = train_and_tune_memm(MEMM_train40, max_iter_values=[5, 10, 15, 20])
# print(f"Best max_iter: {best_max_iter}")
# print(f"Best F1 Score: {best_f1}")


In [7]:
# train and tune

def train_and_tune_crf_with_cv(X, y, param_grid, n_folds=5):

    """
    Train and tune a Conditional Random Fields (CRF) Model using cross-validation.

    Parameters:
    - X: list of extracted features
    - y: list of corresponding tags
    - param_grid: search grid dictionary.
    - n_folds: Number of folds for cross-validation.

    Returns:
    - best_params: A dictionary of the best parameter values found.
    """
        
    best_f1 = 0.0
    best_params = {}

    for max_iter in param_grid['max_iterations']:
        for c1 in param_grid['c1']:
            for c2 in param_grid['c2']:
                f1_scores = []

                for fold in range(n_folds):
                    # Split data into training and validation sets
                    train_indices = [i for i in range(len(X)) if i % n_folds != fold]
                    valid_indices = [i for i in range(len(X)) if i % n_folds == fold]

                    X_train_fold = [X[i] for i in train_indices]
                    y_train_fold = [y[i] for i in train_indices]
                    X_valid_fold = [X[i] for i in valid_indices]
                    y_valid_fold = [y[i] for i in valid_indices]

                    # Train the CRF model
                    trainer = pycrfsuite.Trainer(verbose=False)
                    for x_train, y_train in zip(X_train_fold, y_train_fold):
                        trainer.append(x_train, y_train)
                    trainer.set_params({
                        'max_iterations': max_iter,
                        'c1': c1,
                        'c2': c2,
                        'feature.possible_transitions': True
                    })
                    trainer.train('temp_model.crfsuite')

                    # Test the CRF model
                    tagger = pycrfsuite.Tagger()
                    tagger.open('temp_model.crfsuite')

                    CRF_predictions = [tagger.tag(instance) for instance in X_valid_fold]

                    CRF_flat_predictions = [tag for instance_tags in CRF_predictions for tag in instance_tags]
                    CRF_flat_ground_truth = [tag for instance_tags in y_valid_fold for tag in instance_tags]

                    f1 = f1_score(CRF_flat_ground_truth, CRF_flat_predictions, average='weighted')
                    f1_scores.append(f1)

                mean_f1 = np.mean(f1_scores)
                if mean_f1 > best_f1:
                    best_f1 = mean_f1
                    best_params = {
                        'max_iterations': max_iter,
                        'c1': c1,
                        'c2': c2,
                        'feature.possible_transitions': True
                    }

    #print("Best Parameters: ", best_params)
    #print("Best F1 Score: ", best_f1)

    return best_params



In [8]:

# Define the parameter grid for tuning crf
param_grid = {
    'max_iterations': [20,50,100],
    'c1': [0.01, 0.1, 1.0, 10],
    'c2': [1e-4, 1e-3, 1e-2, 1e-1]
}

In [7]:
# validation
random.seed(1234)
valid_set, model_set = train_test_split(tagged_sentences, train_size=0.1)

# feature extraction    
X_valid = []
y_valid = []
for sentence in valid_set:
    X_sentence = []
    y_sentence = []
    for i in range(len(sentence)):
        X_sentence.append(word_features(sentence, i))
        y_sentence.append(sentence[i][1])
    X_valid.append(X_sentence)
    y_valid.append(y_sentence)    

MEMM_valid = []  # Collect feature-label pairs for MEMM
for sentence_features, sentence_labels in zip(X_valid, y_valid):
    MEMM_valid.extend(list(zip(sentence_features, sentence_labels)))  

best_max_iter = train_and_tune_memm(MEMM_valid, max_iter_values=[10,20,30])  

In [26]:
def corruption_counts(data, prob, num_repetitions):

    tagged_words_true = [ tup for sent in data for tup in sent ]
    true_labels = [pos for _, pos in tagged_words_true]
    f1_scores = []
    error_counts = []

    for _ in range(num_repetitions):
        corrupt_train, error_count = corrupt_training_set(data, switch_prob=prob)
        error_counts.append(error_count)

        tagged_words = [ tup for sent in corrupt_train for tup in sent ]
        corrupt_labels = [pos for _, pos in tagged_words]

        f1 = f1_score(true_labels, corrupt_labels, average='weighted')
        f1_scores.append(f1)
    
    return error_counts, f1_scores


random.seed(1234)
errors, f1 = corruption_counts(model_set, 0.50, 5)


In [10]:
# validation

valid_set, model_set = train_test_split(tagged_sentences, train_size=0.1)

# feature extraction    
X_valid = []
y_valid = []
for sentence in valid_set:
    X_sentence = []
    y_sentence = []
    for i in range(len(sentence)):
        X_sentence.append(word_features(sentence, i))
        y_sentence.append(sentence[i][1])
    X_valid.append(X_sentence)
    y_valid.append(y_sentence)    


            
# CV to find the best max iter
best_param = train_and_tune_crf_with_cv(X_valid, y_valid, param_grid)

Brill function

In [11]:


def train_and_evaluate_brill(data, num_repetitions, prob):

    """
    Train and evaluate a Brill tagger.

    Parameters:
    - data: List of feature-label pairs.
    - num_repetitions: number of times to repeat the experiment.
    - prob: probability of randomly switching tags which will be passed to the corrupt_training_set function

    Returns:
    - f1_scores: list of f1 scores with length equal to num_repetitions.
    - error_counts: A list of annotation errors induced with each run with length equal to num_repetitions.
    """
    f1_scores = []
    error_counts = []

    for _ in range(num_repetitions):
        # train test split
        train_set, test_set = train_test_split(data, train_size=0.8)

        # corruption
        corrupt_train, error_count = corrupt_training_set(train_set, switch_prob=prob)
        error_counts.append(error_count)

        tag1 = DefaultTagger('NOUN')
        unigram_tagger = UnigramTagger(corrupt_train, backoff=tag1)

        templates = nltk.brill.nltkdemo18()
        trainer = BrillTaggerTrainer(templates=templates, initial_tagger=unigram_tagger)

        # Train the Brill Tagger using the templates
        brill_tagger = trainer.train(corrupt_train, max_rules=200)

        # get predictions
        test_untagged_words = [tup[0] for sent in test_set for tup in sent]
        tags = brill_tagger.tag(test_untagged_words)
        brill_preds = [tag for  _,tag in tags]

        test_true_tags = [tup[1] for sent in test_set for tup in sent]
        f1 = f1_score(brill_preds, test_true_tags, average='weighted')
        
        

        f1 = f1_score(test_true_tags, brill_preds, average='weighted')
        f1_scores.append(f1)

    return f1_scores, error_counts

Unigram function

In [12]:
def train_and_evaluate_unigram(data, num_repetitions, prob):

    """
    Train and evaluate a Unigram tagger.

    Parameters:
    - data: List of feature-label pairs.
    - num_repetitions: number of times to repeat the experiment.
    - prob: probability of randomly switching tags which will be passed to the corrupt_training_set function

    Returns:
    - f1_scores: list of f1 scores with length equal to num_repetitions.
    - error_counts: A list of annotation errors induced with each run with length equal to num_repetitions.
    """
    
    f1_scores = []
    error_counts = []

    for _ in range(num_repetitions):
        # train test split
        train_set, test_set = train_test_split(data, train_size=0.8)

        # corruption
        corrupt_train, error_count = corrupt_training_set(train_set, switch_prob=prob)
        error_counts.append(error_count)

        tag1 = DefaultTagger('NOUN')
        unigram_tagger = UnigramTagger(corrupt_train, backoff=tag1)

     
        # get predictions
        test_untagged_words = [tup[0] for sent in test_set for tup in sent]
        unigram_tags = unigram_tagger.tag(test_untagged_words)
        unigram_preds = [tag for  _,tag in unigram_tags]

        test_true_tags = [tup[1] for sent in test_set for tup in sent]

        f1 = f1_score(test_true_tags, unigram_preds, average='weighted')
        f1_scores.append(f1)

    return f1_scores, error_counts

HMM function

In [13]:
def train_and_evaluate_hmm(data, num_repetitions, prob):

    """
    Train and evaluate an HMM tagger.

    Parameters:
    - data: List of feature-label pairs.
    - num_repetitions: number of times to repeat the experiment.
    - prob: probability of randomly switching tags which will be passed to the corrupt_training_set function

    Returns:
    - f1_scores: list of f1 scores with length equal to num_repetitions.
    - error_counts: A list of annotation errors induced with each run with length equal to num_repetitions.
    """
        
    f1_scores = []
    error_counts = []

    for _ in range(num_repetitions):
        # train test split
        train_set, test_set = train_test_split(data, train_size=0.8)

        # corruption
        corrupt_train, error_count = corrupt_training_set(train_set, switch_prob=prob)
        error_counts.append(error_count)

                 
        tagger = nltk.HiddenMarkovModelTagger.train(corrupt_train)
        
        predicted_labels = []
        correct_labels = []

        for i, sent in enumerate(test_set):
            predicted_labels += [tag for _, tag in tagger.tag([word for word, _ in sent])]
            correct_labels += [tag for _, tag in sent]

        f1 = f1_score(correct_labels, predicted_labels, average='weighted')
        f1_scores.append(f1)

    return f1_scores, error_counts

MEMM function

In [14]:
# training and evaluating function

def train_and_evaluate_memm(data, num_repetitions, prob, max_iters):

    """
    Train and evaluate an MEMM tagger.

    Parameters:
    - data: List of feature-label pairs.
    - num_repetitions: number of times to repeat the experiment.
    - prob: probability of randomly switching tags which will be passed to the corrupt_training_set function
    - max_iters: maximum iterations for the MaxentClassifier 

    Returns:
    - f1_scores: list of f1 scores with length equal to num_repetitions.
    - error_counts: A list of annotation errors induced with each run with length equal to num_repetitions.
    """
    # initialize list to keep track of model performance
    f1_scores = []
    error_counts = []


    for _ in range(num_repetitions):

        # train test split and formatting
        train_set, test_set = train_test_split(data, train_size=0.8)
        
        # corrupt train set
        corrupt_train, error_count = corrupt_training_set(train_set, switch_prob=prob)
        error_counts.append(error_count)
        
        # Extract features
        X_train = []
        y_train = []
        for sentence in corrupt_train:
            X_sentence = []
            y_sentence = []
            for i in range(len(sentence)):
                X_sentence.append(word_features(sentence, i))
                y_sentence.append(sentence[i][1])
            X_train.append(X_sentence)
            y_train.append(y_sentence)

        X_test = []
        y_test = []
        for sentence in test_set:
            X_sentence = []
            y_sentence = []
            for i in range(len(sentence)):
                X_sentence.append(word_features(sentence, i))
                y_sentence.append(sentence[i][1])
            X_test.append(X_sentence)
            y_test.append(y_sentence)
        
             
        # gather again
        MEMM_train = []  # Collect feature-label pairs for MEMM
        for sentence_features, sentence_labels in zip(X_train, y_train):
            MEMM_train.extend(list(zip(sentence_features, sentence_labels)))

        MEMM_test = []  # Collect feature-label pairs for MEMM
        for sentence_features, sentence_labels in zip(X_test, y_test):
            MEMM_test.extend(list(zip(sentence_features, sentence_labels)))


        # training using the tuned value
        maxent_classifier = MaxentClassifier.train(MEMM_train, algorithm='gis', max_iter=max_iters)

        # predictions 
        memm_predictions = maxent_classifier.classify_many([features for features, _ in MEMM_test])

        memm_true_labels = [pos for _, pos in MEMM_test]
        
        
        f1 = f1_score(memm_true_labels, memm_predictions, average='weighted')
        f1_scores.append(f1)


    return f1_scores, error_counts

CRF function

In [15]:
# train and evaluate
def train_and_evaluate_crf(data, num_repetitions, prob, param_grid):

    """
    Train and evaluate a CRF tagger.

    Parameters:
    - data: List of feature-label pairs.
    - num_repetitions: number of times to repeat the experiment.
    - prob: probability of randomly switching tags which will be passed to the corrupt_training_set function
    - param_grid: dictionary of parameter specifications for the maximum number of iterations and regularization parameters:

    Returns:
    - f1_scores: list of f1 scores with length equal to num_repetitions.
    - error_counts: A list of annotation errors induced with each run with length equal to num_repetitions.
    """
        
    # initialize list to keep track of model performance
    f1_scores = []
    error_counts = []
 
    for _ in range(num_repetitions):

        # train test split and formatting
        train_set, test_set = train_test_split(data, train_size=0.8)
        
        # corrupt train set
        corrupt_train, error_count = corrupt_training_set(train_set, switch_prob=prob)
        error_counts.append(error_count)
        
        # Extract features
        X_train = []
        y_train = []
        for sentence in corrupt_train:
            X_sentence = []
            y_sentence = []
            for i in range(len(sentence)):
                X_sentence.append(word_features(sentence, i))
                y_sentence.append(sentence[i][1])
            X_train.append(X_sentence)
            y_train.append(y_sentence)

        X_test = []
        y_test = []
        for sentence in test_set:
            X_sentence = []
            y_sentence = []
            for i in range(len(sentence)):
                X_sentence.append(word_features(sentence, i))
                y_sentence.append(sentence[i][1])
            X_test.append(X_sentence)
            y_test.append(y_sentence)
        
                

        # training using the tuned value
        trainer = pycrfsuite.Trainer(verbose=False)

        # Add training data
        for x, y in zip(X_train, y_train):
	        trainer.append(x, y)

        # Set trainer parameters
        trainer.set_params(param_grid)

        # Train the CRF model
        trainer.train('pos.crfsuite')


        # Testing
        # Initialize the tagger
        tagger = pycrfsuite.Tagger()
        tagger.open('pos.crfsuite')


        # predictions
        CRF_predictions = [tagger.tag(instance) for instance in X_test]

        CRF_flat_predictions = [tag for instance_tags in CRF_predictions for tag in instance_tags]
        CRF_flat_ground_truth = [tag for instance_tags in y_test for tag in instance_tags]

        # test score
        f1 = f1_score(CRF_flat_ground_truth, CRF_flat_predictions, average='weighted')
        f1_scores.append(f1)


    return f1_scores, error_counts



Collecting all results

In [16]:
# random.seed(1234)


# unigram_clean, unigram_errors_clean = train_and_evaluate_unigram(data=model_set, num_repetitions=5, prob=0)
# unigram_5, unigram_errors_5 = train_and_evaluate_unigram(data=model_set, num_repetitions=5, prob=0.05)
# unigram_20, unigram_errors_20 = train_and_evaluate_unigram(data=model_set, num_repetitions=5, prob=0.2)
# unigram_35, unigram_errors_35 = train_and_evaluate_unigram(data=model_set, num_repetitions=5, prob=0.35)
# unigram_50, unigram_errors_50 = train_and_evaluate_unigram(data=model_set, num_repetitions=5, prob=0.5)

# brill_clean, brill_errors_clean = train_and_evaluate_brill(data=model_set, num_repetitions=5, prob=0)
# brill_5, brill_errors_5 = train_and_evaluate_brill(data=model_set, num_repetitions=5, prob=0.05)
# brill_20, brill_errors_20 = train_and_evaluate_brill(data=model_set, num_repetitions=5, prob=0.2)
# brill_35, brill_errors_35 = train_and_evaluate_brill(data=model_set, num_repetitions=5, prob=0.35)
# brill_50, brill_errors_50 = train_and_evaluate_brill(data=model_set, num_repetitions=5, prob=0.5)

# hmm_clean, hmm_errors_clean = train_and_evaluate_hmm(data=model_set, num_repetitions=5, prob=0)
# hmm_5, hmm_errors_5 = train_and_evaluate_hmm(data=model_set, num_repetitions=5, prob=0.05)
# hmm_20, hmm_errors_20 = train_and_evaluate_hmm(data=model_set, num_repetitions=5, prob=0.2)
# hmm_35, hmm_errors_35 = train_and_evaluate_hmm(data=model_set, num_repetitions=5, prob=0.35)
# hmm_50, hmm_errors_50 = train_and_evaluate_hmm(data=model_set, num_repetitions=5, prob=0.5)

# memm_clean, memm_errors_clean = train_and_evaluate_memm(data=model_set, num_repetitions=5, prob=0, max_iters=best_max_iter)
# memm_5, memm_errors_5 = train_and_evaluate_memm(data=model_set, num_repetitions=5, prob=0.05, max_iters=best_max_iter)
# memm_20, memm_errors_20 = train_and_evaluate_memm(data=model_set, num_repetitions=5, prob=0.2, max_iters=best_max_iter)
# memm_35, memm_errors_35 = train_and_evaluate_memm(data=model_set, num_repetitions=5, prob=0.35, max_iters=best_max_iter)
# memm_50, memm_errors_50 = train_and_evaluate_memm(data=model_set, num_repetitions=5, prob=0.5, max_iters=best_max_iter)

# crf_clean, crf_errors_clean = train_and_evaluate_crf(data=model_set, num_repetitions=5, prob=0, param_grid=best_param)
# crf_5, crf_errors_5 = train_and_evaluate_crf(data=model_set, num_repetitions=5, prob=0.05, param_grid=best_param)
# crf_20, crf_errors_20 = train_and_evaluate_crf(data=model_set, num_repetitions=5, prob=0.2, param_grid=best_param)
# crf_35, crf_errors_35 = train_and_evaluate_crf(data=model_set, num_repetitions=5, prob=0.35, param_grid=best_param)
# crf_50, crf_errors_50 = train_and_evaluate_crf(data=model_set, num_repetitions=5, prob=0.5, param_grid=best_param)

In [17]:


# f1_scores = {
#     "hmm_clean": hmm_clean,
#     "hmm_5": hmm_5,
#     "hmm_20": hmm_20,
#     "hmm_35": hmm_35,
#     "hmm_50": hmm_50,
#     "memm_clean": memm_clean,
#     "memm_5": memm_5,
#     "memm_20": memm_20,
#     "memm_35": memm_35,
#     "memm_50": memm_50,
#     "crf_clean": crf_clean,
#     "crf_5": crf_5,
#     "crf_20": crf_20,
#     "crf_35": crf_35,
#     "crf_50": crf_50,
#     "unigram_clean": unigram_clean,
#     "unigram_5": unigram_5,
#     "unigram_20": unigram_20,
#     "unigram_35": unigram_35,
#     "unigram_50": unigram_50,
#     "brill_clean": brill_clean,
#     "brill_5": brill_5,
#     "brill_20": brill_20,
#     "brill_35": brill_35,
#     "brill_50": brill_50
# }

# with open('insert file path here', "w") as file:
#     for model_threshold, f1_score in f1_scores.items():
#         file.write(f"{model_threshold}: {f1_score}\n")


In [19]:
# with open('insert file path here', "w") as file:
#     for param, paramval in best_param.items():
#         file.write(f"{param}: {paramval}\n")

In [20]:
# max_it = {
#     "max_it": best_max_iter
# }

# with open('insert file path here', "w") as file:
#     for param, paramval in max_it.items():
#         file.write(f"{param}: {paramval}\n")