In [13]:
!pip install --upgrade numpy
!pip install --upgrade opencv-python





Collecting opencv-python
  Downloading opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl (38.6 MB)
     --------------------------------------- 38.6/38.6 MB 23.4 MB/s eta 0:00:00
Installing collected packages: opencv-python
  Attempting uninstall: opencv-python
    Found existing installation: opencv-python 4.8.0.74
    Uninstalling opencv-python-4.8.0.74:
      Successfully uninstalled opencv-python-4.8.0.74


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\luizc\\anaconda3\\Lib\\site-packages\\~-2\\cv2.pyd'
Consider using the `--user` option or check the permissions.



In [14]:
import numpy as np
import cv2
import mysql.connector
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skimage.feature import hog
from skimage import exposure
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from sklearn.utils import shuffle

# Load pre-trained VGG16 model
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Function to extract LBP features
def extract_lbp_features(image):
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = cv2.LBP_create()
    lbp_img = lbp.compute(gray_img)[0]  # Compute LBP image
    hist, _ = np.histogram(lbp_img.ravel(), bins=np.arange(0, 256), range=(0, 256))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)  # Normalize histogram
    return hist

# Function to extract combined features
def extract_combined_features(image):
    # Resize image while preserving aspect ratio
    resized_img = cv2.resize(image, (128, 64))  # Adjust size as needed
    
    # Convert image to grayscale
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    
    # Extract HOG features
    hog_features, _ = hog(gray_img, orientations=9, pixels_per_cell=(8, 8),
                          cells_per_block=(2, 2), block_norm='L2-Hys',
                          visualize=True, transform_sqrt=True)
    
    # Enhance HOG image for visualization
    hog_img = exposure.rescale_intensity(hog_img, in_range=(0, 10))

    # Extract color histogram features
    color_features = cv2.calcHist([resized_img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()

    # Extract LBP features
    lbp_features = extract_lbp_features(image)
    
    # Preprocess image for VGG16
    img = cv2.resize(image, (224, 224))
    img_array = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    
    # Extract VGG16 features
    vgg16_features = vgg16_model.predict(img_array).flatten()
    
    # Combine all features
    combined_features = np.hstack((hog_features, color_features, lbp_features, vgg16_features))
    
    return combined_features

# Function to load and extract features from images fetched from MySQL
def load_and_extract_features_from_mysql(cursor, feature_extractor):
    cursor.execute("SELECT image_path, building_name, location FROM images JOIN buildings ON images.building_id = buildings.building_id")
    features = []
    labels = []
    locations = []
    for image_path, building_name, location in cursor:
        img = cv2.imread(image_path)
        img_features = feature_extractor(img)
        features.append(img_features)
        labels.append(building_name)
        locations.append(location)
    return features, labels, locations

# MySQL info
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': 'Luiz2001!',
    'database': 'campus_tour',
}

# Open connection to DB
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

# Load and extract features using the combined feature extractor
X_features, y_labels, locations_train = load_and_extract_features_from_mysql(cursor, extract_combined_features)

# Convert to numpy arrays
X = np.array(X_features)
y = np.array(y_labels)

# Shuffle the data
X, y, locations_train = shuffle(X, y, locations_train, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test, locations_train, _ = train_test_split(X, y, locations_train, test_size=0.2, random_state=42)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict labels for the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Close DB connection
cursor.close()
conn.close()

# Function for data augmentation
def augment_data(images):
    augmented_images = []
    for img in images:
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
    return augmented_images

# Augment data
X_train_augmented = augment_data(X_train)
y_train_augmented = np.tile(y_train,  # Keep the same labels for augmented data
                            len(X_train_augmented) // len(X_train))

# Combine original and augmented data
X_combined = np.concatenate((X_train, X_train_augmented))
y_combined = np.concatenate((y_train, y_train_augmented))

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in kf.split(X_combined):
    X_train_fold, X_test_fold = X_combined[train_index], X_combined[test_index]
    y_train_fold, y_test_fold = y_combined[train_index], y_combined[test_index]
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)


RuntimeError: module compiled against API version 0x10 but this version of numpy is 0xe

SystemError: initialization of _pywrap_checkpoint_reader raised unreported exception

In [2]:
import numpy as np
import cv2
import mysql.connector
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skimage.feature import hog
from skimage import exposure
from sklearn.utils import shuffle

# Function to extract combined HOG and color features
def extract_combined_features(image):
    # Resize image while preserving aspect ratio
    resized_img = cv2.resize(image, (128, 64))  # Adjust size as needed
    
    # Convert image to grayscale
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    
    # Extract HOG features
    hog_features, hog_img = hog(gray_img, orientations=9, pixels_per_cell=(8, 8),
                                cells_per_block=(2, 2), block_norm='L2-Hys',
                                visualize=True, transform_sqrt=True)
    
    # Enhance HOG image for visualization
    hog_img = exposure.rescale_intensity(hog_img, in_range=(0, 10))

    # Extract color histogram features
    color_features = cv2.calcHist([resized_img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
    
    # Combine HOG and color features
    combined_features = np.hstack((hog_features, color_features))
    
    return combined_features

# Function to load and extract features from images fetched from MySQL
def load_and_extract_features_from_mysql(cursor, feature_extractor):
    cursor.execute("SELECT image_path, building_name, location FROM images JOIN buildings ON images.building_id = buildings.building_id")
    features = []
    labels = []
    locations = []
    for image_path, building_name, location in cursor:
        img = cv2.imread(image_path)
        img_features = feature_extractor(img)
        features.append(img_features)
        labels.append(building_name)
        locations.append(location)
    return features, labels, locations

# MySQL info
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': 'Luiz2001!',
    'database': 'campus_tour',
}

# Open connection to DB
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

# Load and extract features using the combined feature extractor
X_features, y_labels, locations_train = load_and_extract_features_from_mysql(cursor, extract_combined_features)

# Convert to numpy arrays
X = np.array(X_features)
y = np.array(y_labels)

# Shuffle the data
X, y, locations_train = shuffle(X, y, locations_train, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test, locations_train, _ = train_test_split(X, y, locations_train, test_size=0.2, random_state=42)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict labels for the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Close DB connection
cursor.close()
conn.close()

# Function for data augmentation
def augment_data(images):
    augmented_images = []
    for img in images:
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
    return augmented_images

# Augment data
X_train_augmented = augment_data(X_train)
y_train_augmented = np.tile(y_train,  # Keep the same labels for augmented data
                            len(X_train_augmented) // len(X_train))

# Combine original and augmented data
X_combined = np.concatenate((X_train, X_train_augmented))
y_combined = np.concatenate((y_train, y_train_augmented))

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in kf.split(X_combined):
    X_train_fold, X_test_fold = X_combined[train_index], X_combined[test_index]
    y_train_fold, y_test_fold = y_combined[train_index], y_combined[test_index]
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)


Accuracy: 0.2608695652173913
Classification Report:
                  precision    recall  f1-score   support

           Aiken       0.00      0.00      0.00         2
             CCM       0.12      0.50      0.19         4
     Durick Hall       0.25      0.50      0.33         2
     Finney Quad       0.00      0.00      0.00         1
     Foster Hall       0.00      0.00      0.00         1
    Garden House       0.00      0.00      0.00         3
 Hauke Courtyard       0.00      0.00      0.00         1
             IDX       0.00      0.00      0.00         2
      Joyce Hall       0.00      0.00      0.00         1
    Juniper Bike       0.00      0.00      0.00         1
Metz Studio Barn       0.00      0.00      0.00         2
   Miller Center       0.00      0.00      0.00         1
      Perry Hall       0.50      0.40      0.44         5
    Rowell Annex       0.00      0.00      0.00         1
      SD Ireland       0.00      0.00      0.00         7
      Skiff Hall   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mean Cross-Validated Accuracy: 0.8944444444444445


In [2]:
from sklearn.model_selection import KFold

# Function for data augmentation
def augment_data(images):
    augmented_images = []
    for img in images:
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
    return augmented_images

# Augment data
X_train_augmented = augment_data(X_train)
y_train_augmented = np.tile(y_train,  # Keep the same labels for augmented data
                            len(X_train_augmented) // len(X_train))

# Combine original and augmented data
X_combined = np.concatenate((X_train, X_train_augmented))
y_combined = np.concatenate((y_train, y_train_augmented))

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in kf.split(X_combined):
    X_train_fold, X_test_fold = X_combined[train_index], X_combined[test_index]
    y_train_fold, y_test_fold = y_combined[train_index], y_combined[test_index]
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)


Mean Cross-Validated Accuracy: 0.9


In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Initialize lists to store evaluation metrics for each fold
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X, y):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    
    # Augment data for this fold
    X_train_fold_augmented = augment_data(X_train_fold)
    y_train_fold_augmented = np.tile(y_train_fold, len(X_train_fold_augmented) // len(X_train_fold))
    
    # Combine original and augmented data
    X_train_combined = np.concatenate((X_train_fold, X_train_fold_augmented))
    y_train_combined = np.concatenate((y_train_fold, y_train_fold_augmented))
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_combined, y_train_combined)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate evaluation metrics for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    precision_fold = precision_score(y_test_fold, y_pred_fold, average='weighted')
    recall_fold = recall_score(y_test_fold, y_pred_fold, average='weighted')
    f1_fold = f1_score(y_test_fold, y_pred_fold, average='weighted')
    
    # Append scores to lists
    accuracy_scores.append(accuracy_fold)
    precision_scores.append(precision_fold)
    recall_scores.append(recall_fold)
    f1_scores.append(f1_fold)

# Calculate mean scores across folds
mean_accuracy = np.mean(accuracy_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_f1 = np.mean(f1_scores)

# Print the mean scores
print("Mean Cross-Validated Accuracy:", mean_accuracy)
print("Mean Cross-Validated Precision:", mean_precision)
print("Mean Cross-Validated Recall:", mean_recall)
print("Mean Cross-Validated F1-score:", mean_f1)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mean Cross-Validated Accuracy: 0.3983574879227053
Mean Cross-Validated Precision: 0.3193917367487161
Mean Cross-Validated Recall: 0.3983574879227053
Mean Cross-Validated F1-score: 0.3095960215475932


  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

# Function for data augmentation
def augment_data(images, labels, max_samples_per_class=4):
    augmented_images = []
    augmented_labels = []
    samples_per_class = {}  # Dictionary to keep track of samples per class
    
    for img, label in zip(images, labels):
        # Check if the current class has reached the maximum allowed samples
        if label not in samples_per_class:
            samples_per_class[label] = 1
        else:
            if samples_per_class[label] >= max_samples_per_class:
                continue  # Skip augmentation for this sample
        
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
        augmented_labels.append(label)
        
        # Increment the count of samples for the current class
        samples_per_class[label] += 1
        
    return augmented_images, augmented_labels

# Function to load and preprocess images from a directory
def load_and_preprocess_images_from_directory(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img_path = os.path.join(directory, filename)
            img = cv2.imread(img_path)
            img_features = extract_combined_features(img)  # Assuming you have a function for feature extraction
            images.append((filename, img_features))
    return images

# Directory containing the downloaded images
test_images_directory = "C:/Users/luizc/Capstone/test_images/"

# Load and preprocess test images
test_images = load_and_preprocess_images_from_directory(test_images_directory)

# Perform cross-validation

# Assuming X_train and y_train are defined
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for train_index, test_index in kf.split(X_train):
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
    
    # Augment data with a maximum of 4 samples per class
    X_train_augmented, y_train_augmented = augment_data(X_train_fold, y_train_fold, max_samples_per_class=4)
    
    # Combine original and augmented data
    X_combined = np.concatenate((X_train_fold, X_train_augmented))
    y_combined = np.concatenate((y_train_fold, y_train_augmented))
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_combined, y_combined)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)

# Predict labels for the test images
predicted_labels = []
for filename, img_features in test_images:
    predicted_label = rf_classifier.predict([img_features])[0]
    predicted_labels.append((filename, predicted_label))

# Write out the predicted labels for each image
for filename, predicted_label in predicted_labels:
    print(f"Image: {filename}, Predicted Label: {predicted_label}")


Mean Cross-Validated Accuracy: 0.2777777777777778
Image: Aiken_1_image.jpg, Predicted Label: CCM
Image: CCM_1_image.jpg, Predicted Label: Durick Hall
Image: Joyce_1_image.jpg, Predicted Label: Samuel De Champlain
Image: perry_4_image.jpg, Predicted Label: Perry Hall
Image: SD IREland_1_image.jpg, Predicted Label: Perry Hall


# 4 samples compiled with location

# Dont touch just in case

In [5]:
import numpy as np
import cv2
import os
import glob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skimage.feature import hog, daisy
from skimage import io
from sklearn.utils import shuffle

# Function to extract HOG features
def extract_hog_features(image):
    resized_img = cv2.resize(image, (64, 32))
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    hog_features = hog(gray_img, block_norm='L2-Hys', pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
    return hog_features

# Function to extract DAISY features
def extract_daisy_features(image):
    resized_img = cv2.resize(image, (64, 32))
    gray_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
    daisy_features = daisy(gray_img, step=8, radius=8, rings=2, histograms=6, visualize=False)
    return daisy_features.flatten()

# Function to load and extract features from images in a directory
def load_and_extract_features(directory, feature_extractor):
    features = []
    for entry in glob.glob(directory):
        img = cv2.imread(entry)
        img_features = feature_extractor(img)
        features.append(img_features)
    return features

# Data directories
data_directories = [
    ("C:/Capstone/Campus Pictures/Aiken/*", "Aiken"),
    ("C:/Capstone/Campus Pictures/Campus Map/*", "Campus Map"),
    ("C:/Capstone/Campus Pictures/CCM/*", "CCM"),
    ("C:/Capstone/Campus Pictures/Durick Hall/*", "Durick Hall"),
    ("C:/Capstone/Campus Pictures/EATS/*", "EATS"),
    ("C:/Capstone/Campus Pictures/Finney Quad/*", "Finney Quad"),
    ("C:/Capstone/Campus Pictures/Foster/*", "Foster"),
    ("C:/Capstone/Campus Pictures/Freeman Hall/*", "Freeman Hall"),
    ("C:/Capstone/Campus Pictures/Garden House/*", "Garden House"),
    ("C:/Capstone/Campus Pictures/Hauke Courtyard/*", "Hauke Courtyard"),
    ("C:/Capstone/Campus Pictures/Health Center/*", "Health Center"),
    ("C:/Capstone/Campus Pictures/IDX/*", "IDX"),
    ("C:/Capstone/Campus Pictures/Joyce Hall/*", "Joyce Hall"),
    ("C:/Capstone/Campus Pictures/Juniper Bike/*", "Juniper Bike"),
    ("C:/Capstone/Campus Pictures/Metz Studio Barn/*", "Metz Studio Barn"),
    ("C:/Capstone/Campus Pictures/Miller Center/*", "Miller Center"),
    ("C:/Capstone/Campus Pictures/Perry Hall/*", "Perry Hall"),
    ("C:/Capstone/Campus Pictures/Rowell Annex/*", "Rowell Annex"),
    ("C:/Capstone/Campus Pictures/Samuel De Champlain/*", "Samuel De Champlain"),
    ("C:/Capstone/Campus Pictures/SD Ireland/*", "SD Ireland"),
    ("C:/Capstone/Campus Pictures/Skiff Hall/*", "Skiff Hall"),
    ("C:/Capstone/Campus Pictures/West Hall/*", "West Hall"),
    ("C:/Capstone/Campus Pictures/Wick Hall/*", "Wick Hall")
]

# Load and extract features
X_features = []
y_labels = []
for directory, label in data_directories:
    features = load_and_extract_features(directory, extract_hog_features)  # Change to extract_daisy_features for DAISY
    X_features.extend(features)
    y_labels.extend([label] * len(features))

# Convert to numpy arrays
X = np.array(X_features)
y = np.array(y_labels)

# Shuffle the data
X, y = shuffle(X, y, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict labels for the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.36363636363636365
Classification Report:
                     precision    recall  f1-score   support

              Aiken       0.00      0.00      0.00         1
                CCM       0.25      0.83      0.38         6
        Durick Hall       0.50      0.33      0.40         3
       Freeman Hall       0.00      0.00      0.00         2
       Garden House       0.00      0.00      0.00         1
      Health Center       0.00      0.00      0.00         2
                IDX       0.00      0.00      0.00         4
         Joyce Hall       0.00      0.00      0.00         1
       Juniper Bike       0.00      0.00      0.00         1
   Metz Studio Barn       0.00      0.00      0.00         1
         Perry Hall       0.60      1.00      0.75         3
       Rowell Annex       0.00      0.00      0.00         1
         SD Ireland       0.00      0.00      0.00         5
Samuel De Champlain       0.00      0.00      0.00         2
         Skiff Hall       0.44 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
from sklearn.model_selection import KFold

# Function for data augmentation
def augment_data(images):
    augmented_images = []
    for img in images:
        # Apply transformations to the image (e.g., rotation, flipping, scaling)
        augmented_img = img  # Placeholder for transformation code
        augmented_images.append(augmented_img)
    return augmented_images

# Augment data
X_train_augmented = augment_data(X_train)
y_train_augmented = np.tile(y_train,  # Keep the same labels for augmented data
                            len(X_train_augmented) // len(X_train))

# Combine original and augmented data
X_combined = np.concatenate((X_train, X_train_augmented))
y_combined = np.concatenate((y_train, y_train_augmented))

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in kf.split(X_combined):
    X_train_fold, X_test_fold = X_combined[train_index], X_combined[test_index]
    y_train_fold, y_test_fold = y_combined[train_index], y_combined[test_index]
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate accuracy for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    accuracies.append(accuracy_fold)

# Calculate mean accuracy across folds
mean_accuracy = np.mean(accuracies)
print("Mean Cross-Validated Accuracy:", mean_accuracy)


Mean Cross-Validated Accuracy: 0.8216149068322981


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Initialize lists to store evaluation metrics for each fold
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X, y):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    
    # Augment data for this fold
    X_train_fold_augmented = augment_data(X_train_fold)
    y_train_fold_augmented = np.tile(y_train_fold, len(X_train_fold_augmented) // len(X_train_fold))
    
    # Combine original and augmented data
    X_train_combined = np.concatenate((X_train_fold, X_train_fold_augmented))
    y_train_combined = np.concatenate((y_train_fold, y_train_fold_augmented))
    
    # Train Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train_combined, y_train_combined)
    
    # Predict labels for the testing data
    y_pred_fold = rf_classifier.predict(X_test_fold)
    
    # Calculate evaluation metrics for this fold
    accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
    precision_fold = precision_score(y_test_fold, y_pred_fold, average='weighted')
    recall_fold = recall_score(y_test_fold, y_pred_fold, average='weighted')
    f1_fold = f1_score(y_test_fold, y_pred_fold, average='weighted')
    
    # Append scores to lists
    accuracy_scores.append(accuracy_fold)
    precision_scores.append(precision_fold)
    recall_scores.append(recall_fold)
    f1_scores.append(f1_fold)

# Calculate mean scores across folds
mean_accuracy = np.mean(accuracy_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_f1 = np.mean(f1_scores)

# Print the mean scores
print("Mean Cross-Validated Accuracy:", mean_accuracy)
print("Mean Cross-Validated Precision:", mean_precision)
print("Mean Cross-Validated Recall:", mean_recall)
print("Mean Cross-Validated F1-score:", mean_f1)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mean Cross-Validated Accuracy: 0.2799154334038055
Mean Cross-Validated Precision: 0.18988859297782573
Mean Cross-Validated Recall: 0.2799154334038055
Mean Cross-Validated F1-score: 0.2015178485500293


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
import os

# Function to load and preprocess images
def load_and_preprocess_images_from_directory(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img_path = os.path.join(directory, filename)
            img = cv2.imread(img_path)
            img_features = extract_hog_features(img)
            images.append((filename, img_features))
    return images

# Directory containing the downloaded images
test_images_directory = "C:/Users/luizc/Capstone/test_images/"

# Load and preprocess test images
test_images = load_and_preprocess_images_from_directory(test_images_directory)

# Predict labels for the test images
predicted_labels = []
for filename, img_features in test_images:
    predicted_label = rf_classifier.predict([img_features])[0]
    predicted_labels.append((filename, predicted_label))

# Write out the predicted labels for each image
for filename, predicted_label in predicted_labels:
    print(f"Image: {filename}, Predicted Label: {predicted_label}")


Image: Aiken.jpg, Predicted Label: Skiff Hall
Image: CCM.jpg, Predicted Label: Skiff Hall
Image: Joyce.jpg, Predicted Label: Skiff Hall
Image: perry.jpg, Predicted Label: Skiff Hall
Image: SD IREland.jpg, Predicted Label: Skiff Hall
