In [3]:
import numpy as np
import cv2
import os
import glob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skimage.feature import hog, daisy
from skimage import io
from sklearn.utils import shuffle

# Function to extract HOG features
def extract_hog_features(image):
    resized_img = cv2.resize(image, (64, 32))
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    hog_features = hog(gray_img, block_norm='L2-Hys', pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    return hog_features

# Function to extract DAISY features
def extract_daisy_features(image):
    resized_img = cv2.resize(image, (64, 32))
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    daisy_features = daisy(gray_img, step=8, radius=8, rings=2, histograms=6, visualize=False)
    return daisy_features.flatten()

# Function to load and extract features from images in a directory
def load_and_extract_features(directory, feature_extractor):
    features = []
    for entry in glob.glob(directory):
        img = cv2.imread(entry)
        img_features = feature_extractor(img)
        features.append(img_features)
    return features

# Data directories
data_directories = [
    ("C:/Capstone/Campus Pictures/Aiken/*", "Aiken"),
    ("C:/Capstone/Campus Pictures/Campus Map/*", "Campus Map"),
    ("C:/Capstone/Campus Pictures/CCM/*", "CCM"),
    ("C:/Capstone/Campus Pictures/Durick Hall/*", "Durick Hall"),
    ("C:/Capstone/Campus Pictures/EATS/*", "EATS"),
    ("C:/Capstone/Campus Pictures/Finney Quad/*", "Finney Quad"),
    ("C:/Capstone/Campus Pictures/Foster/*", "Foster"),
    ("C:/Capstone/Campus Pictures/Freeman Hall/*", "Freeman Hall"),
    ("C:/Capstone/Campus Pictures/Garden House/*", "Garden House"),
    ("C:/Capstone/Campus Pictures/Hauke Courtyard/*", "Hauke Courtyard"),
    ("C:/Capstone/Campus Pictures/Health Center/*", "Health Center"),
    ("C:/Capstone/Campus Pictures/IDX/*", "IDX"),
    ("C:/Capstone/Campus Pictures/Joyce Hall/*", "Joyce Hall"),
    ("C:/Capstone/Campus Pictures/Juniper Bike/*", "Juniper Bike"),
    ("C:/Capstone/Campus Pictures/Metz Studio Barn/*", "Metz Studio Barn"),
    ("C:/Capstone/Campus Pictures/Miller Center/*", "Miller Center"),
    ("C:/Capstone/Campus Pictures/Perry Hall/*", "Perry Hall"),
    ("C:/Capstone/Campus Pictures/Rowell Annex/*", "Rowell Annex"),
    ("C:/Capstone/Campus Pictures/Samuel De Champlain/*", "Samuel De Champlain"),
    ("C:/Capstone/Campus Pictures/SD Ireland/*", "SD Ireland"),
    ("C:/Capstone/Campus Pictures/Skiff Hall/*", "Skiff Hall"),
    ("C:/Capstone/Campus Pictures/West Hall/*", "West Hall"),
    ("C:/Capstone/Campus Pictures/Wick Hall/*", "Wick Hall")
]

# Load and extract features
X_features = []
y_labels = []
for directory, label in data_directories:
    features = load_and_extract_features(directory, extract_hog_features)  # Change to extract_daisy_features for DAISY
    X_features.extend(features)
    y_labels.extend([label] * len(features))

# Convert to numpy arrays
X = np.array(X_features)
y = np.array(y_labels)

# Shuffle the data
X, y = shuffle(X, y, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict labels for the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.36363636363636365
Classification Report:
                     precision    recall  f1-score   support

              Aiken       0.00      0.00      0.00         1
                CCM       0.25      0.83      0.38         6
        Durick Hall       0.50      0.33      0.40         3
       Freeman Hall       0.00      0.00      0.00         2
       Garden House       0.00      0.00      0.00         1
      Health Center       0.00      0.00      0.00         2
                IDX       0.00      0.00      0.00         4
         Joyce Hall       0.00      0.00      0.00         1
       Juniper Bike       0.00      0.00      0.00         1
   Metz Studio Barn       0.00      0.00      0.00         1
         Perry Hall       0.60      1.00      0.75         3
       Rowell Annex       0.00      0.00      0.00         1
         SD Ireland       0.00      0.00      0.00         5
Samuel De Champlain       0.00      0.00      0.00         2
         Skiff Hall       0.44 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
from sklearn.model_selection import KFold

# Function for data augmentation
def augment_data(images):
    augmented_images = []
    for img in images:
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
    return augmented_images

# Augment data
X_train_augmented = augment_data(X_train)
y_train_augmented = np.tile(y_train,  # Keep the same labels for augmented data
                            len(X_train_augmented) // len(X_train))

# Combine original and augmented data
X_combined = np.concatenate((X_train, X_train_augmented))
y_combined = np.concatenate((y_train, y_train_augmented))

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in kf.split(X_combined):
    X_train_fold, X_test_fold = X_combined[train_index], X_combined[test_index]
    y_train_fold, y_test_fold = y_combined[train_index], y_combined[test_index]
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)


Mean Cross-Validated Accuracy: 0.8216149068322981


In [9]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Initialize lists to store evaluation metrics for each fold
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X, y):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    
    # Augment data for this fold
    X_train_fold_augmented = augment_data(X_train_fold)
    y_train_fold_augmented = np.tile(y_train_fold, len(X_train_fold_augmented) // len(X_train_fold))
    
    # Combine original and augmented data
    X_train_combined = np.concatenate((X_train_fold, X_train_fold_augmented))
    y_train_combined = np.concatenate((y_train_fold, y_train_fold_augmented))
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_combined, y_train_combined)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate evaluation metrics for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    precision_fold = precision_score(y_test_fold, y_pred_fold, average='weighted')
    recall_fold = recall_score(y_test_fold, y_pred_fold, average='weighted')
    f1_fold = f1_score(y_test_fold, y_pred_fold, average='weighted')
    
    # Append scores to lists
    accuracy_scores.append(accuracy_fold)
    precision_scores.append(precision_fold)
    recall_scores.append(recall_fold)
    f1_scores.append(f1_fold)

# Calculate mean scores across folds
mean_accuracy = np.mean(accuracy_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_f1 = np.mean(f1_scores)

# Print the mean scores
print("Mean Cross-Validated Accuracy:", mean_accuracy)
print("Mean Cross-Validated Precision:", mean_precision)
print("Mean Cross-Validated Recall:", mean_recall)
print("Mean Cross-Validated F1-score:", mean_f1)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mean Cross-Validated Accuracy: 0.2799154334038055
Mean Cross-Validated Precision: 0.18988859297782573
Mean Cross-Validated Recall: 0.2799154334038055
Mean Cross-Validated F1-score: 0.2015178485500293


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
