In [None]:
from sklearn.ensemble import RandomForestRegressor  # type: ignore
from sklearn.svm import SVR, LinearSVR  # type: ignore
from evaluator import ModelEvaluator
import numpy as np
import sys, warnings

sys.path.append("../")
from Datasets.setCreator import SetCreator
from Datasets.setModifier import SetModifier

In [None]:
N_ESTIMATORS = [100, 200, 500]
MAX_DEPTH = [5, 7, 10, 15]

C = [0.01, 0.25, 0.5, 0.75, 1]
GAMMA = [0.1, 0.5, 2, 5, 10]
DEGREE = [1, 2, 3]

In [None]:
warnings.filterwarnings("ignore")

setModifier = SetModifier()
evaluator = ModelEvaluator()

setCreator = SetCreator()
dataset1 = setCreator.getSetList1()
dataset2 = setCreator.getSetList2()
dataset3 = setCreator.getSetList3()
dataset4 = setCreator.getSetList4()
dataset5 = setCreator.getSetList5()

# RandomForestRegression

In [None]:
def testWithErgotFeatures(predictor, dataset):
    print("Running the Random Forest Regression:")
    print(f"Predicting: {predictor}\n\n")
    numExcludedSets = 0
    bestModel = {"avg_accuracy": -100}

    numCombinations = len(dataset)

    for currData in dataset:
        trainningData = currData["train"]

        trainY = []
        for val in trainningData[predictor].values:
            trainY.append(float(val))
        trainX = setModifier.rmErgotPredictors(trainningData)

        testData = currData["test"]

        testY = []
        for val in testData[predictor].values:
            testY.append(float(val))
        testX = setModifier.rmErgotPredictors(testData)

        for estimator in N_ESTIMATORS:
            for depth in MAX_DEPTH:
                try:
                    rfc = RandomForestRegressor(
                        random_state=5, n_estimators=estimator, max_depth=depth
                    )
                    rfc.fit(trainX, trainY)
                    results = evaluator.evaluateRegression(
                        rfc, currData["desc"], trainX, trainY, testX, testY
                    )

                    if results["avg_accuracy"] > bestModel["avg_accuracy"]:
                        bestModel = results

                        bestModel["n_estimators"] = estimator
                        bestModel["max_depth"] = depth

                except Exception as e:
                    numExcludedSets += 1

    print(
        f"[{numCombinations - numExcludedSets}/{numCombinations}] sets were tested on the Random Forest Regression"
    )
    print(f"The best model, as per avg_accuracy, was:\n {bestModel}")

In [None]:
def testWithoutErgotFeatures(predictor, dataset):
    print("Running the Random Forest Regression:")
    print(f"Predicting: {predictor}\n\n")
    numExcludedSets = 0
    bestModel = {"avg_accuracy": -100}

    numCombinations = len(dataset)

    for currData in dataset:
        trainningData = currData["train"]

        trainY = []
        for val in trainningData[predictor].values:
            trainY.append(float(val))
        trainX = setModifier.rmErgotFeatures(trainningData)

        testData = currData["test"]

        testY = []
        for val in testData[predictor].values:
            testY.append(float(val))
        testX = setModifier.rmErgotFeatures(testData)

        for estimator in N_ESTIMATORS:
            for depth in MAX_DEPTH:
                try:
                    rfc = RandomForestRegressor(
                        random_state=5, n_estimators=estimator, max_depth=depth
                    )
                    rfc.fit(trainX, trainY)
                    results = evaluator.evaluateRegression(
                        rfc, currData["desc"], trainX, trainY, testX, testY
                    )

                    if results["avg_accuracy"] > bestModel["avg_accuracy"]:
                        bestModel = results

                        bestModel["n_estimators"] = estimator
                        bestModel["max_depth"] = depth

                except Exception as e:
                    numExcludedSets += 1

    print(
        f"[{numCombinations - numExcludedSets}/{numCombinations}] sets were tested on the Random Forest Regression"
    )
    print(f"The best model, as per avg_accuracy, was:\n {bestModel}")

In [None]:
testWithErgotFeatures("percnt_true", dataset1)
testWithoutErgotFeatures("percnt_true", dataset1)
testWithErgotFeatures("percnt_true", dataset2)
testWithoutErgotFeatures("percnt_true", dataset2)
testWithErgotFeatures("percnt_true", dataset3)
testWithoutErgotFeatures("percnt_true", dataset3)
testWithErgotFeatures("percnt_true", dataset4)
testWithoutErgotFeatures("percnt_true", dataset4)
testWithErgotFeatures("percnt_true", dataset5)
testWithoutErgotFeatures("percnt_true", dataset5)

In [None]:
testWithErgotFeatures("sum_severity", dataset1)
testWithoutErgotFeatures("sum_severity", dataset1)
testWithErgotFeatures("sum_severity", dataset2)
testWithoutErgotFeatures("sum_severity", dataset2)
testWithErgotFeatures("sum_severity", dataset3)
testWithoutErgotFeatures("sum_severity", dataset3)
testWithErgotFeatures("sum_severity", dataset4)
testWithoutErgotFeatures("sum_severity", dataset4)
testWithErgotFeatures("sum_severity", dataset5)
testWithoutErgotFeatures("sum_severity", dataset5)

# Support Vector Machine

In [None]:
def SVMLinearWithErgotFeatures(predictor, dataset):
    print("Running linear SVM Regression:")
    print(f"Predicting: {predictor}\n\n")
    numExcludedSets = 0
    bestModel = {"avg_accuracy": -100}

    numCombinations = len(dataset)

    for currData in dataset:
        trainningData = currData["train"]
        trainY = trainningData[predictor]
        trainX = setModifier.rmErgotPredictors(trainningData)

        testData = currData["test"]
        testY = testData[predictor]
        testX = setModifier.rmErgotPredictors(testData)

        for c in C:
            try:
                smv = LinearSVR(C=c, random_state=0)
                smv.fit(trainX, trainY)
                results = evaluator.evaluateRegression(
                    smv,
                    currData["desc"],
                    trainX,
                    trainY,
                    testX,
                    testY,
                    hasFeatImportance=False,
                )

                if results["avg_accuracy"] > bestModel["avg_accuracy"]:
                    bestModel = results

                    bestModel["c"] = c

            except Exception as e:
                numExcludedSets += 1

    print(
        f"[{numCombinations - numExcludedSets}/{numCombinations}] sets were tested with linear SVM Regression"
    )
    print(f"The best model, as per avg_accuracy, was:\n {bestModel}")

In [None]:
def SVMPolyWithErgotFeatures(predictor, dataset):
    print("Running poly kernal SVM Regression:")
    print(f"Predicting: {predictor}\n\n")
    numExcludedSets = 0
    bestModel = {"avg_accuracy": -100}

    numCombinations = len(dataset)

    for currData in dataset:
        trainningData = currData["train"]
        trainY = trainningData[predictor]
        trainX = setModifier.rmErgotPredictors(trainningData)

        testData = currData["test"]
        testY = testData[predictor]
        testX = setModifier.rmErgotPredictors(testData)

        for c in C:
            for deg in DEGREE:
                try:
                    smv = SVR(kernel="poly", degree=deg, coef0=1, C=c, random_state=0)
                    smv.fit(trainX, trainY)
                    results = evaluator.evaluateRegression(
                        smv,
                        currData["desc"],
                        trainX,
                        trainY,
                        testX,
                        testY,
                        hasFeatImportance=False,
                    )

                    if results["avg_accuracy"] > bestModel["avg_accuracy"]:
                        bestModel = results

                        bestModel["c"] = c
                        bestModel["degree"] = deg

                except Exception as e:
                    numExcludedSets += 1

    print(
        f"[{numCombinations - numExcludedSets}/{numCombinations}] sets were tested with poly kernal SVM Regression"
    )
    print(f"The best model, as per avg_accuracy, was:\n {bestModel}")

In [None]:
def SVMRBFWithErgotFeatures(predictor, dataset):
    print("Running RBF SVM Regression:")
    print(f"Predicting: {predictor}\n\n")
    numExcludedSets = 0
    bestModel = {"avg_accuracy": -100}

    numCombinations = len(dataset)

    for currData in dataset:
        trainningData = currData["train"]
        trainY = trainningData[predictor]
        trainX = setModifier.rmErgotPredictors(trainningData)

        testData = currData["test"]
        testY = testData[predictor]
        testX = setModifier.rmErgotPredictors(testData)

        for c in C:
            for gam in GAMMA:
                try:
                    smv = SVR(kernel="rbf", gamma=gam, C=c, random_state=0)
                    smv.fit(trainX, trainY)
                    results = evaluator.evaluateRegression(
                        smv,
                        currData["desc"],
                        trainX,
                        trainY,
                        testX,
                        testY,
                        hasFeatImportance=False,
                    )

                    if results["avg_accuracy"] > bestModel["avg_accuracy"]:
                        bestModel = results

                        bestModel["c"] = c
                        bestModel["gamma"] = gam

                except Exception as e:
                    numExcludedSets += 1

    print(
        f"[{numCombinations - numExcludedSets}/{numCombinations}] sets were tested with RBF SVM Regression"
    )
    print(f"The best model, as per avg_accuracy, was:\n {bestModel}")

In [None]:
SVMLinearWithErgotFeatures("percnt_true", dataset1)
SVMPolyWithErgotFeatures("percnt_true", dataset1)
SVMRBFWithErgotFeatures("percnt_true", dataset1)
SVMLinearWithErgotFeatures("percnt_true", dataset2)
SVMPolyWithErgotFeatures("percnt_true", dataset2)
SVMRBFWithErgotFeatures("percnt_true", dataset2)
SVMLinearWithErgotFeatures("percnt_true", dataset3)
SVMPolyWithErgotFeatures("percnt_true", dataset3)
SVMRBFWithErgotFeatures("percnt_true", dataset3)
SVMLinearWithErgotFeatures("percnt_true", dataset4)
SVMPolyWithErgotFeatures("percnt_true", dataset4)
SVMRBFWithErgotFeatures("percnt_true", dataset4)
SVMLinearWithErgotFeatures("percnt_true", dataset5)
SVMPolyWithErgotFeatures("percnt_true", dataset5)
SVMRBFWithErgotFeatures("percnt_true", dataset5)

In [None]:
SVMLinearWithErgotFeatures("sum_severity", dataset2)
SVMPolyWithErgotFeatures("sum_severity", dataset2)
SVMRBFWithErgotFeatures("sum_severity", dataset2)
SVMLinearWithErgotFeatures("sum_severity", dataset3)
SVMPolyWithErgotFeatures("sum_severity", dataset3)
SVMRBFWithErgotFeatures("sum_severity", dataset3)
SVMLinearWithErgotFeatures("sum_severity", dataset4)
SVMPolyWithErgotFeatures("sum_severity", dataset4)
SVMRBFWithErgotFeatures("sum_severity", dataset4)
SVMLinearWithErgotFeatures("sum_severity", dataset5)
SVMPolyWithErgotFeatures("sum_severity", dataset5)
SVMRBFWithErgotFeatures("sum_severity", dataset5)