In [1]:
import Models as models
import Model_Utils as model_utils

In [2]:
train_features, train_labels, test_features, test_labels = model_utils.preprocess_csv("Datasets/CDoBT.csv")

## Model Tests

In this section there are tests of various machine learning models on our dataset. We evaluate the performance of the following models:

1. **Multinomial Naive Bayes:**
   A probabilistic model that is commonly used for text classification tasks. It assumes that features are conditionally independent given the class label.

2. **Complement Naive Bayes:**
   An extension of the Multinomial Naive Bayes model that is designed to address the issue of imbalanced class distributions.

3. **Gaussian Naive Bayes:**
   A variant of Naive Bayes that assumes that features follow a Gaussian (normal) distribution within each class.

4. **Random Forest:**
   An ensemble learning method that combines multiple decision trees to improve predictive accuracy and control overfitting.

5. **Decision Tree:**
   A simple model that uses a tree-like structure to make decisions based on feature values.

6. **K-Nearest Neighbors (KNN):**
   An instance-based learning algorithm that makes predictions by finding the majority class among the k nearest neighbors of a given data point.

7. **C-Support Vector Classification (SVC):**
   A type of Support Vector Machine (SVM) used for binary classification. It seeks to find a hyperplane that maximizes the margin between classes.

In [None]:
# Multinomial Naive Bayes
model_multinomial_bayes = models.create_multinomial_naive_bayes(features=train_features, labels=train_labels)
models.evaluate_model(model=model_multinomial_bayes, test_features=test_features, test_labels=test_labels)

In [None]:
# Complement Naive Bayes
model_complement_bayes = models.create_complement_naive_bayes(features=train_features, labels=train_labels)
models.evaluate_model(model=model_complement_bayes, test_features=test_features, test_labels=test_labels)

In [None]:
# Gaussian Naive Bayes
model_gaussian_bayes = models.create_gaussian_naive_bayes(features=train_features, labels=train_labels)
models.evaluate_model(model=model_gaussian_bayes, test_features=test_features, test_labels=test_labels)

In [3]:
# Random Forest
model_random_forest = models.create_random_forest(features=train_features, labels=train_labels)
models.evaluate_model(model=model_random_forest, test_features=test_features, test_labels=test_labels, scaling_and_processing=True)

Test accuracy: 0.5917490914836467


In [4]:
# Decision Tree
model_decision_tree = models.create_decision_tree(features=train_features, labels=train_labels)
models.evaluate_model(model=model_decision_tree, test_features=test_features, test_labels=test_labels, scaling_and_processing=True)

Test accuracy: 0.5975067151208722


In [None]:
# K-Nearest Neighbors
model_knn = models.create_knn(features=train_features, labels=train_labels)
models.evaluate_model(model=model_knn, test_features=test_features, test_labels=test_labels)

In [None]:
# C-Support Vector Classification
model_svm = models.create_svc(features=train_features, labels=train_labels)
models.evaluate_model(model=model_svm, test_features=test_features, test_labels=test_labels)

In [None]:
model_neural_network_mk_1 = models.create_neural_network_mk_1(train_features=train_features, train_labels=train_labels)
models.evaluate_neural_network(trained_model=model_neural_network_mk_1, test_features=test_features, test_labels=test_labels)

In [None]:
model_neural_network_mk_2 = models.create_neural_network_mk_2(train_features=train_features, train_labels=train_labels)
models.evaluate_neural_network(trained_model=model_neural_network_mk_2, test_features=test_features, test_labels=test_labels)

In [None]:
model_neural_network_mk_3 = models.create_neural_network_mk_3(train_features=train_features, train_labels=train_labels)
models.evaluate_neural_network(trained_model=model_neural_network_mk_3, test_features=test_features, test_labels=test_labels, normalise=True)

In [None]:
model_neural_network_mk_4 = models.create_neural_network_mk_4(train_features=train_features, train_labels=train_labels, num_epochs=15)
models.evaluate_neural_network(trained_model=model_neural_network_mk_4, test_features=test_features, test_labels=test_labels, normalise=True, encoding=True)