In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

from GaborExtractor import GaborExtractor
from utils import read_processed_data

In [2]:
data, labels = read_processed_data('../Preprocessed Dataset')

# Split the data using sklearn's train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
data = 1
labels = 1
X_test = 1
y_test = 1
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Check the size of the training and test sets
print(f"Training set size: {len(X_train)}")
# print(f"Validation set size: {len(X_val)}")

Training set size: 2397


In [3]:
# Load the data from the file, assuming no missing values but using genfromtxt for its flexibility
data = np.genfromtxt("Gabor_features_training.csv", delimiter=",")

# Splitting into features and target variable
X_train_loaded = data[:, :-1]
y_train_loaded = data[:, -1]

In [4]:
np.all(y_train == y_train_loaded)

True

In [5]:
# Define parameters
orientations = [k * np.pi / 8 for k in range(1, 9)]
frequencies = np.linspace(0.1, 0.5, 5)

In [6]:
# Generate and apply Gabor filters
gabor_extractor_training = GaborExtractor()
X_train = gabor_extractor_training.extract_gabor_features(X_train, orientations, frequencies)

In [7]:
# # Open the file in read-binary mode
# with open('train_features_32.pkl', 'rb') as file:
#     X_train_imported = pickle.load(file)

# # Open the file in read-binary mode
# with open('test_features_32.pkl', 'rb') as file:
#     X_test_imported = pickle.load(file)

In [8]:
# Define the classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "LDA": LinearDiscriminantAnalysis(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "SVM": svm.SVC(kernel='linear'),
    "Decision Tree": DecisionTreeClassifier()
}

# Train and test each classifier
for name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    
    # Predict on the training data
    y_train_pred = clf.predict(X_train)
    
    # Calculate and print the result statistics
    print(f"Classification Report on {name}:\n", classification_report(y_train, y_train_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Classification Report on Logistic Regression:
               precision    recall  f1-score   support

           0       0.76      0.79      0.77       602
           1       0.90      0.94      0.92       589
           2       0.80      0.66      0.73       597
           3       0.76      0.82      0.79       609

    accuracy                           0.80      2397
   macro avg       0.81      0.81      0.80      2397
weighted avg       0.81      0.80      0.80      2397

Classification Report on LDA:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       602
           1       0.99      0.98      0.99       589
           2       0.96      0.96      0.96       597
           3       0.96      0.96      0.96       609

    accuracy                           0.97      2397
   macro avg       0.97      0.97      0.97      2397
weighted avg       0.97      0.97      0.97      2397

Classification Report on QDA:
               precisi

In [9]:
# Ensure y_train is a 2D column vector
y_train = y_train.reshape(-1, 1)

# Combine the arrays
combined_array = np.hstack((X_train, y_train))

# Save the combined array to a file
np.savetxt("Gabor_features_training.csv", combined_array, delimiter=",", fmt='%f')

In [10]:
# Generate and apply Gabor filters
gabor_extractor_validation = GaborExtractor()
X_val = gabor_extractor_validation.extract_gabor_features(X_val, orientations, frequencies)

In [11]:
# Train and test each classifier
for name, clf in classifiers.items():    
    # Predict on the training data
    y_val_pred = clf.predict(X_val)
    
    # Calculate and print the result statistics
    print(f"Classification Report on {name}:\n", classification_report(y_val, y_val_pred))

Classification Report on Logistic Regression:
               precision    recall  f1-score   support

           0       0.75      0.73      0.74       190
           1       0.91      0.98      0.94       201
           2       0.80      0.67      0.73       212
           3       0.70      0.80      0.75       197

    accuracy                           0.79       800
   macro avg       0.79      0.79      0.79       800
weighted avg       0.79      0.79      0.79       800

Classification Report on LDA:
               precision    recall  f1-score   support

           0       0.97      0.96      0.97       190
           1       1.00      1.00      1.00       201
           2       0.91      0.95      0.93       212
           3       0.96      0.93      0.94       197

    accuracy                           0.96       800
   macro avg       0.96      0.96      0.96       800
weighted avg       0.96      0.96      0.96       800

Classification Report on QDA:
               precisi

In [12]:
# Ensure y_train is a 2D column vector
y_val = y_val.reshape(-1, 1)

# Combine the arrays
combined_array = np.hstack((X_val, y_val))

# Save the combined array to a file
np.savetxt("Gabor_features_validation.csv", combined_array, delimiter=",", fmt='%f')