## Instal Toolkit, Import Libraries, Access Google Drive

In [1]:
pip install LAMDA-SSL

Collecting LAMDA-SSL
  Downloading LAMDA_SSL-1.0.2-py3-none-any.whl (240 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.8/240.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting torch-geometric (from LAMDA-SSL)
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric, LAMDA-SSL
Successfully installed LAMDA-SSL-1.0.2 torch-geometric-2.4.0


In [2]:
# Imports

# General Libraries
from google.colab import drive
import numpy as np
import pandas as pd
import time
import random
import os

# For Training
from sklearn.model_selection import GroupKFold
from LAMDA_SSL.Split.ViewSplit import ViewSplit
from sklearn import preprocessing

# Measurements
from LAMDA_SSL.Evaluation.Classifier.Accuracy import Accuracy
from LAMDA_SSL.Evaluation.Classifier.Precision import Precision
from LAMDA_SSL.Evaluation.Classifier.Recall import Recall
from LAMDA_SSL.Evaluation.Classifier.F1 import F1
from LAMDA_SSL.Evaluation.Classifier.AUC import AUC
from LAMDA_SSL.Evaluation.Classifier.Confusion_Matrix import Confusion_Matrix

# Supervised Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Semi-Supervised Models
from LAMDA_SSL.Algorithm.Classification.Co_Training import Co_Training
from LAMDA_SSL.Algorithm.Classification.Tri_Training import Tri_Training
from LAMDA_SSL.Algorithm.Classification.SSGMM import SSGMM
from LAMDA_SSL.Algorithm.Classification.Assemble import Assemble
from LAMDA_SSL.Algorithm.Classification.SemiBoost import SemiBoost

In [3]:
if(os.getcwd() != "/content/drive/MyDrive/Final Year Project"):
  drive.mount('/content/drive', force_remount=True)
  %cd /content/drive/MyDrive/Final Year Project/

Mounted at /content/drive
/content/drive/MyDrive/Final Year Project


## Dataset Initialisation and Model Parameters

In [4]:
# CSV Dataset
recolaDataset = pd.read_csv("Datasets/RecolaLabelledFull.csv")

# Features
audioFeatures = recolaDataset.filter(regex=f'^{"ComPar"}|{"audio_speech"}', axis=1)
visualFeatures = recolaDataset.filter(regex=f'^{"VIDEO"}|{"Face_detection"}', axis=1)
physiologyFeatures = recolaDataset.filter(regex=f'^{"ECG"}|{"EDA"}', axis=1)
allFeatures = recolaDataset.filter(regex=f'^{"ComPar"}|{"audio_speech"}|{"VIDEO"}|{"Face_detection"}|{"ECG"}|{"EDA"}', axis=1)

# Labels
targetArousal = recolaDataset["classLabelArousal"]
targetValence = recolaDataset["classLabelValence"]

# Number of Folds
folds = 9

# Groups
groups = list(recolaDataset["Participant"])

# Folder to save results
saveFolder = "SemiSupervisedResults(Test)"

In [5]:
# Different kinds of input
featureList = ["Audio", "Visual", "Phys", "All"]
targetList = ["Arousal", "Valence"]
baseEstList = ["BLR", "RF", "NN"]
labelledCount = ["4", "8", "12"]

# Dictionary with input meanings
valueDict = {
    "Audio": audioFeatures,
    "Visual": visualFeatures,
    "Phys": physiologyFeatures,
    "All": allFeatures,
    "Arousal": targetArousal,
    "Valence": targetValence,
}


## Co-Training (with Group k-fold)

In [6]:
def coTraining(estimatorType, feature, label, folds, groups, labelledCount, fileName):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits = folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    trainFeatures, testFeatures = feature.iloc[train_index].values, feature.iloc[test_index].values
    trainLabels, testLabels = label.iloc[train_index].values, label.iloc[test_index].values

    numOfLabeledParticipants = int(len(trainLabels) * (labelledCount/16))
    labeled_mask = np.array([True] * numOfLabeledParticipants + [False] * (len(trainLabels) - numOfLabeledParticipants))
    random.shuffle(labeled_mask)

    # Train Set Labelled Data
    labeledFeatures = trainFeatures[labeled_mask]
    labeledLabels = trainLabels[labeled_mask]

    # Train Set Unlabelled Data
    unlabeledFeatures = trainFeatures[~labeled_mask]
    unlabeledLabels = trainLabels[~labeled_mask]

    # For Evaluation
    evaluation={
      'Accuracy':Accuracy(),
      'Precision':Precision(average='macro'),
      'ConfusionMatrix':Confusion_Matrix()
    }

    # Choosing base estimators
    if estimatorType == "BLR":
      estimator1 = LogisticRegression(solver = "lbfgs", max_iter = 2000)
      estimator2 = LogisticRegression(solver = "lbfgs", max_iter = 2000)
    elif estimatorType == "RF":
      estimator1 = RandomForestClassifier(n_estimators = 50, random_state = 42)
      estimator2 = RandomForestClassifier(n_estimators = 50, random_state = 42)
    elif estimatorType == "NN":
      estimator1 = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
      estimator2 = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
    else: raise Exception("Invalid")

    # Creating and fitting Model
    model = Co_Training(base_estimator = estimator1, base_estimator_2 = estimator2, evaluation=evaluation)
    model.fit(X = labeledFeatures, y = labeledLabels, unlabeled_X = unlabeledFeatures)

    # Getting Performance Results
    performanceTrain = model.evaluate(X = trainFeatures,y = trainLabels)
    performanceTest = model.evaluate(X = testFeatures,y = testLabels)

    # Append results
    foldNumber.append(len(foldNumber)+1)
    trainAccuracy.append(performanceTrain["Accuracy"])
    trainPrecision.append(performanceTrain["Precision"])
    testAccuracy.append(performanceTest["Accuracy"])
    testPrecision.append(performanceTest["Precision"])
    confusionMatrixList.append(performanceTest["ConfusionMatrix"])

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber,
                          "trainAccuracy": trainAccuracy,
                          "trainPrecision": trainPrecision,
                          "testAccuracy": testAccuracy,
                          "testPrecision": testPrecision})
  csvFile.to_csv("SemiSupervisedResults/CoTraining/PerFolds/" + fileName + ".csv", index=False)

  # Save average model results
  txtFileName = "SemiSupervisedResults/CoTraining/Average/" + fileName + ".txt"
  MTrainAccuracy = "Mean Train Accuracy: " + str(np.mean(trainAccuracy))
  MTrainPrecision = "\nMean Train Precision: " + str(np.mean(trainPrecision))
  MTestAccuracy = "\nMean Test Accuracy: " + str(np.mean(testAccuracy))
  MTestPrecision = "\nMean Test Precision: " + str(np.mean(testPrecision))
  MConfusionMatrix = "\nMean Confusion Matrix:\n" + str(np.mean(confusionMatrixList, axis=0))

  with open(txtFileName, "w") as file:
    file.write(MTrainAccuracy)
    file.write(MTrainPrecision)
    file.write(MTestAccuracy)
    file.write(MTestPrecision)
    file.write(MConfusionMatrix)
    file.close

In [None]:
# with open("SemiSupervisedResults/CoTraining/NoCopyChecker.txt", "w") as file:
#   file.close

completed  = []
with open("SemiSupervisedResults/CoTraining/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()
completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/72 - " + str(completed))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()

          print(fileName)
          coTraining(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName)

          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("SemiSupervisedResults/CoTraining/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

Completed Tests: 3/72 - ['AudioArousalBLR4', 'AudioArousalBLR8', 'AudioArousalBLR12']
AudioArousalRF4


## Tri-Training (with Group k-fold)

In [None]:
def triTraining(estimatorType, feature, label, folds, groups, labelledCount, fileName):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    trainFeatures, testFeatures = feature.iloc[train_index].values, feature.iloc[test_index].values
    trainLabels, testLabels = label.iloc[train_index].values, label.iloc[test_index].values

    numOfLabeledParticipants = int(len(trainLabels) * (labelledCount/16))
    labeled_mask = np.array([True] * numOfLabeledParticipants + [False] * (len(trainLabels) - numOfLabeledParticipants))
    random.shuffle(labeled_mask)

    # Train Set Labelled Data
    labeledFeatures = trainFeatures[labeled_mask]
    labeledLabels = trainLabels[labeled_mask]

    # Train Set Unlabelled Data
    unlabeledFeatures = trainFeatures[~labeled_mask]
    unlabeledLabels = trainLabels[~labeled_mask]

    # For Evaluation
    evaluation={
      'Accuracy':Accuracy(),
      'Precision':Precision(average='macro'),
      'ConfusionMatrix':Confusion_Matrix()
    }

    # Choosing base estimators
    if estimatorType == "BLR":
      estimator1 = LogisticRegression(solver = "lbfgs", max_iter = 2000)
      estimator2 = LogisticRegression(solver = "lbfgs", max_iter = 2000)
      estimator3 = LogisticRegression(solver = "lbfgs", max_iter = 2000)
    elif estimatorType == "RF":
      estimator1 = RandomForestClassifier(n_estimators = 50, random_state = 42)
      estimator2 = RandomForestClassifier(n_estimators = 50, random_state = 42)
      estimator3 = RandomForestClassifier(n_estimators = 50, random_state = 42)
    elif estimatorType == "NN":
      estimator1 = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
      estimator2 = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
      estimator2 = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
    else: raise Exception("Invalid")

    # Creating and fitting model
    model = Tri_Training(base_estimator = estimator1, base_estimator_2 = estimator2, base_estimator_3 = estimator3, evaluation = evaluation)
    model.fit(X = labeledFeatures, y = labeledLabels, unlabeled_X = unlabeledFeatures)

    # Getting Performance Results
    performanceTrain = model.evaluate(X = trainFeatures,y = trainLabels)
    performanceTest = model.evaluate(X = testFeatures,y = testLabels)

    # Append results
    foldNumber.append(len(foldNumber)+1)
    trainAccuracy.append(performanceTrain["Accuracy"])
    trainPrecision.append(performanceTrain["Precision"])
    testAccuracy.append(performanceTest["Accuracy"])
    testPrecision.append(performanceTest["Precision"])
    confusionMatrixList.append(performanceTest["ConfusionMatrix"])

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber,
                          "trainAccuracy": trainAccuracy,
                          "trainPrecision": trainPrecision,
                          "testAccuracy": testAccuracy,
                          "testPrecision": testPrecision})
  csvFile.to_csv("SemiSupervisedResults/TriTraining/PerFolds/" + fileName + ".csv", index=False)

  # Save average model results
  txtFileName = "SemiSupervisedResults/TriTraining/Average/" + fileName + ".txt"
  MTrainAccuracy = "Mean Train Accuracy: " + str(np.mean(trainAccuracy))
  MTrainPrecision = "\nMean Train Precision: " + str(np.mean(trainPrecision))
  MTestAccuracy = "\nMean Test Accuracy: " + str(np.mean(testAccuracy))
  MTestPrecision = "\nMean Test Precision: " + str(np.mean(testPrecision))
  MConfusionMatrix = "\nMean Confusion Matrix:\n" + str(np.mean(confusionMatrixList, axis=0))

  with open(txtFileName, "w") as file:
    file.write(MTrainAccuracy)
    file.write(MTrainPrecision)
    file.write(MTestAccuracy)
    file.write(MTestPrecision)
    file.write(MConfusionMatrix)
    file.close

In [None]:
# with open("SemiSupervisedResults/TriTraining/NoCopyChecker.txt", "w") as file:
#   file.close

completed  = []
with open("SemiSupervisedResults/TriTraining/NoCopyChecker.txt", "r+") as file:
  completed  = file.read()
  file.close()
completed = completed.split()
print("Completed Tests: " + str(len(completed)) + "/72 - " + str(completed))

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()

          print(fileName)
          triTraining(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName)

          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("SemiSupervisedResults/TriTraining/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

Completed Tests: 0/72 - []
AudioArousalBLR4


KeyboardInterrupt: ignored

## SSGMM (with Group k-fold)

In [None]:
def SSGMMModel(feature, label, folds, groups, labelledCount, fileName):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits = folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    start = time.time()

    # Getting Train and Test Sets
    trainFeatures, testFeatures = feature.iloc[train_index].values, feature.iloc[test_index].values
    trainLabels, testLabels = label.iloc[train_index].values, label.iloc[test_index].values

    numOfLabeledParticipants = int(len(trainLabels) * (labelledCount/16))
    labeled_mask = np.array([True] * numOfLabeledParticipants + [False] * (len(trainLabels) - numOfLabeledParticipants))
    random.shuffle(labeled_mask)

    # Train Set Labelled Data
    labeledFeatures = trainFeatures[labeled_mask]
    labeledLabels = trainLabels[labeled_mask]

    # Train Set Unlabelled Data
    unlabeledFeatures = trainFeatures[~labeled_mask]
    unlabeledLabels = trainLabels[~labeled_mask]

    # For Evaluation
    evaluation={
      'Accuracy':Accuracy(),
      'Precision':Precision(average='macro'),
      'ConfusionMatrix':Confusion_Matrix()
    }

    # Creating and fitting model
    model = SSGMM(tolerance=0.000001, max_iterations = 5, evaluation = evaluation)
    model.fit(X = labeledFeatures, y = labeledLabels, unlabeled_X = unlabeledFeatures)

    # Getting Performance Results
    performanceTrain = model.evaluate(X = trainFeatures,y = trainLabels)
    performanceTest = model.evaluate(X = testFeatures,y = testLabels)

    # Append results
    foldNumber.append(len(foldNumber)+1)
    trainAccuracy.append(performanceTrain["Accuracy"])
    trainPrecision.append(performanceTrain["Precision"])
    testAccuracy.append(performanceTest["Accuracy"])
    testPrecision.append(performanceTest["Precision"])
    confusionMatrixList.append(performanceTest["ConfusionMatrix"])

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber,
                          "trainAccuracy": trainAccuracy,
                          "trainPrecision": trainPrecision,
                          "testAccuracy": testAccuracy,
                          "testPrecision": testPrecision})
  csvFile.to_csv("SemiSupervisedResults/SSGMM/PerFolds/" + fileName + ".csv", index=False)

  # Save average model results
  txtFileName = "SemiSupervisedResults/SSGMM/Average/" + fileName + ".txt"
  MTrainAccuracy = "Mean Train Accuracy: " + str(np.mean(trainAccuracy))
  MTrainPrecision = "\nMean Train Precision: " + str(np.mean(trainPrecision))
  MTestAccuracy = "\nMean Test Accuracy: " + str(np.mean(testAccuracy))
  MTestPrecision = "\nMean Test Precision: " + str(np.mean(testPrecision))
  MConfusionMatrix = "\nMean Confusion Matrix:\n" + str(np.mean(confusionMatrixList, axis=0))

  with open(txtFileName, "w") as file:
    file.write(MTrainAccuracy)
    file.write(MTrainPrecision)
    file.write(MTestAccuracy)
    file.write(MTestPrecision)
    file.write(MConfusionMatrix)
    file.close

In [None]:
SSGMMModel(audioFeatures, targetArousal, folds, groups, 4, saveFolder)

testString = ""
with open("SemiSupervisedResults/SSGMM/NoCopyChecker.txt", "w+") as file:
  testString = file.read()
completed = testString.split()

for i in featureList:
  for j in targetList:
    for l in labelledCount:
      fileName = i + j + l
      if fileName not in completed:
        start = time.time()
        print(fileName)
        SSGMMModel(valueDict[i], valueDict[j], folds, groups, int(l), fileName)
        end = time.time()
        print("Done: " + str(end-start) + "\n")

        with open("SemiSupervisedResults/SSGMM/NoCopyChecker.txt", "a") as file:
          file.write(fileName + " ")
          file.close

## Assemble (with Group k-fold)

In [None]:
def assemble(estimatorType, feature, label, folds, groups, labelledCount, fileName):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    trainFeatures, testFeatures = feature.iloc[train_index].values, feature.iloc[test_index].values
    trainLabels, testLabels = label.iloc[train_index].values, label.iloc[test_index].values

    numOfLabeledParticipants = int(len(trainLabels) * (labelledCount/16))
    labeled_mask = np.array([True] * numOfLabeledParticipants + [False] * (len(trainLabels) - numOfLabeledParticipants))
    random.shuffle(labeled_mask)

    # Train Set Labelled Data
    labeledFeatures = trainFeatures[labeled_mask]
    labeledLabels = trainLabels[labeled_mask]

    # Train Set Unlabelled Data
    unlabeledFeatures = trainFeatures[~labeled_mask]
    unlabeledLabels = trainLabels[~labeled_mask]

    # For Evaluation
    evaluation={
      'Accuracy':Accuracy(),
      'Precision':Precision(average='macro'),
      'ConfusionMatrix':Confusion_Matrix()
    }

    # Choosing base estimators
    if estimatorType == "BLR":
      estimator = LogisticRegression(solver = "lbfgs", max_iter = 2000)
    elif estimatorType == "RF":
      estimator = RandomForestClassifier(n_estimators = 50, random_state = 42)
    elif estimatorType == "NN":
      estimator = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
    else: raise Exception("Invalid")

    # Creating Model
    model = Assemble(base_estimator = estimator, evaluation = evaluation)

    # Fitting Model
    model.fit(X = labeledFeatures, y = labeledLabels, unlabeled_X = unlabeledFeatures)

    # Getting Performance Results
    performanceTrain = model.evaluate(X = trainFeatures,y = trainLabels)
    performanceTest = model.evaluate(X = testFeatures,y = testLabels)

    # Append results
    foldNumber.append(len(foldNumber)+1)
    trainAccuracy.append(performanceTrain["Accuracy"])
    trainPrecision.append(performanceTrain["Precision"])
    testAccuracy.append(performanceTest["Accuracy"])
    testPrecision.append(performanceTest["Precision"])
    confusionMatrixList.append(performanceTest["ConfusionMatrix"])

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber,
                          "trainAccuracy": trainAccuracy,
                          "trainPrecision": trainPrecision,
                          "testAccuracy": testAccuracy,
                          "testPrecision": testPrecision})
  csvFile.to_csv("SemiSupervisedResults/TriTraining/PerFolds/" + fileName + ".csv", index=False)

  # Save average model results
  txtFileName = "SemiSupervisedResults/TriTraining/Average/" + fileName + ".txt"
  MTrainAccuracy = "Mean Train Accuracy: " + str(np.mean(trainAccuracy))
  MTrainPrecision = "\nMean Train Precision: " + str(np.mean(trainPrecision))
  MTestAccuracy = "\nMean Test Accuracy: " + str(np.mean(testAccuracy))
  MTestPrecision = "\nMean Test Precision: " + str(np.mean(testPrecision))
  MConfusionMatrix = "\nMean Confusion Matrix:\n" + str(np.mean(confusionMatrixList, axis=0))

  with open(txtFileName, "w") as file:
    file.write(MTrainAccuracy)
    file.write(MTrainPrecision)
    file.write(MTestAccuracy)
    file.write(MTestPrecision)
    file.write(MConfusionMatrix)
    file.close

In [None]:
testString = ""
with open("SemiSupervisedResults/CoTraining/NoCopyChecker.txt", "w+") as file:
  testString = file.read()
completed = testString.split()

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()
          print(fileName)
          assemble(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName)
          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("SemiSupervisedResults/Assemble/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close

## SemiBoost (with Group k-fold)

In [None]:
def semiBoost(estimatorType, feature, label, folds, groups, labelledCount, fileName):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    trainFeatures, testFeatures = feature.iloc[train_index].values, feature.iloc[test_index].values
    trainLabels, testLabels = label.iloc[train_index].values, label.iloc[test_index].values

    numOfLabeledParticipants = int(len(trainLabels) * (labelledCount/16))
    labeled_mask = np.array([True] * numOfLabeledParticipants + [False] * (len(trainLabels) - numOfLabeledParticipants))
    random.shuffle(labeled_mask)

    # Train Set Labelled Data
    labeledFeatures = trainFeatures[labeled_mask]
    labeledLabels = trainLabels[labeled_mask]

    # Train Set Unlabelled Data
    unlabeledFeatures = trainFeatures[~labeled_mask]
    unlabeledLabels = trainLabels[~labeled_mask]

    # For Evaluation
    evaluation={
      'Accuracy':Accuracy(),
      'Precision':Precision(average='macro'),
      'ConfusionMatrix':Confusion_Matrix()
    }

    # Choosing base estimators
    if estimatorType == "BLR":
      estimator = LogisticRegression(solver = "lbfgs", max_iter = 2000)
    elif estimatorType == "RF":
      estimator = RandomForestClassifier(n_estimators = 50, random_state = 42)
    elif estimatorType == "NN":
      estimator = MLPClassifier(hidden_layer_sizes = (32,), max_iter = 10000, random_state = 42)
    else: raise Exception("Invalid")

    # Creating and fitting model
    model=SemiBoost(base_estimator=estimator, evaluation=evaluation)
    model.fit(X = labeledFeatures, y = labeledLabels, unlabeled_X = unlabeledFeatures)

    # Getting Performance Results
    performanceTrain = model.evaluate(X = trainFeatures,y = trainLabels)
    performanceTest = model.evaluate(X = testFeatures,y = testLabels)

    # Append results
    foldNumber.append(len(foldNumber)+1)
    trainAccuracy.append(performanceTrain["Accuracy"])
    trainPrecision.append(performanceTrain["Precision"])
    testAccuracy.append(performanceTest["Accuracy"])
    testPrecision.append(performanceTest["Precision"])
    confusionMatrixList.append(performanceTest["ConfusionMatrix"])

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber,
                          "trainAccuracy": trainAccuracy,
                          "trainPrecision": trainPrecision,
                          "testAccuracy": testAccuracy,
                          "testPrecision": testPrecision})
  csvFile.to_csv("SemiSupervisedResults/SemiBoost/PerFolds/" + fileName + ".csv", index=False)

  # Save average model results
  txtFileName = "SemiSupervisedResults/SemiBoost/Average/" + fileName + ".txt"
  MTrainAccuracy = "Mean Train Accuracy: " + str(np.mean(trainAccuracy))
  MTrainPrecision = "\nMean Train Precision: " + str(np.mean(trainPrecision))
  MTestAccuracy = "\nMean Test Accuracy: " + str(np.mean(testAccuracy))
  MTestPrecision = "\nMean Test Precision: " + str(np.mean(testPrecision))
  MConfusionMatrix = "\nMean Confusion Matrix:\n" + str(np.mean(confusionMatrixList, axis=0))

  with open(txtFileName, "w") as file:
    file.write(MTrainAccuracy)
    file.write(MTrainPrecision)
    file.write(MTestAccuracy)
    file.write(MTestPrecision)
    file.write(MConfusionMatrix)
    file.close

In [None]:
testString = ""
with open("SemiSupervisedResults/SemiBoost/NoCopyChecker.txt", "w+") as file:
  testString = file.read()
completed = testString.split()

for i in featureList:
  for j in targetList:
    for k in baseEstList:
      for l in labelledCount:
        fileName = i + j + k + l
        if fileName not in completed:
          start = time.time()
          print(fileName)
          semiBoost(k, valueDict[i], valueDict[j], folds, groups, int(l), fileName)
          end = time.time()
          print("Done: " + str(end-start) + "\n")

          with open("SemiSupervisedResults/SemiBoost/NoCopyChecker.txt", "a") as file:
            file.write(fileName + " ")
            file.close