# This is running the Random Forest (RF) models, all dataset folders at once.  

The model takes the image data from a root folder (greyscale/gy. original(RBG)/og or Four-channel/fc and runs them through a random forest model.

VERSIONS

Outputs are to the 3 (_gy, _or, _fc) data folders which themselves reflect the quality of the data provided.

PARAMETERS
It adjusts the data to the 3 datasizes used in the KNN and Random Forest Models (64x64,128x128,256x256)

The 3x3 grid search is number of estimators x max depth

The model uses 5-fold on the 80% 'training-validation' data.  The data includes 20% for final testing.

LOCALISATION

root_folders - these are the sources of data (right at the end of each code block)
Adjust as needed.

It is assumed that earlier models and pip's have been installed.

TIMING

The models (when run in series and with concurrent cores) ran in 30 mins.
This is indicative as depends on the resources available.

RESULTS

each model x imagesize combination generates an excel file with results and logs
These are saved in the related root folders.



In [2]:
import os
import numpy as np
import cv2
import time
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score, train_test_split
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from datetime import datetime
import concurrent.futures
import sys

# Function to load and preprocess images
def load_images_from_folder(folder, target_size):
    images = []
    labels = []
    class_names = os.listdir(folder)
    filenames = []
    for label, class_name in enumerate(class_names):
        class_folder = os.path.join(folder, class_name)
        for filename in os.listdir(class_folder):
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            if img is not None:
                # Handle different bit depths
                if len(img.shape) == 2:  # Grayscale image
                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                elif img.shape[2] == 4:  # 4-channel image (e.g., RGBA)
                    img = img[:, :, :3]  # Discard the alpha channel

                img = cv2.resize(img, target_size)  # Resize to target_size
                images.append(img)
                labels.append(label)
                filenames.append(filename)
    return np.array(images), np.array(labels), class_names, filenames

# Function to preprocess and flatten images
def preprocess_images(X, target_size):
    X_resized = []
    for img in X:
        img_resized = cv2.resize(img, target_size)
        X_resized.append(img_resized)
    X_resized = np.array(X_resized).astype('float32') / 255.0
    return X_resized.reshape((X_resized.shape[0], -1))

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100],
    'criterion': ['gini'
#                  , 'entropy'
                 ],
    'max_depth': [
        None, 
        10, 
        20, 
#        30
    ],
    'min_samples_split': [2
#    , 5, 10
                         ],
    'min_samples_leaf': [1
#    , 2, 4
    ]
}

image_sizes = [(64, 64), (128, 128), (256, 256)]

def run_model(train_dataset_folder, test_dataset_folder, model_type, root_folder_suffix):
    # Record start time for the model processing
    model_start_time = datetime.now()
    log = []
    log.append(f"Processing {root_folder_suffix}:{model_type} - Started at {model_start_time}")
    print(f"Processing {root_folder_suffix}:{model_type} - Started at {model_start_time}")

    # Load and preprocess training data
    log.append(f"Processing {root_folder_suffix}:{model_type} - Loading and preprocessing training data")
    print(f"Processing {root_folder_suffix}:{model_type} - Loading and preprocessing training data")
    start_time = time.time()
    X_train_orig, y_train, class_names, train_filenames = load_images_from_folder(train_dataset_folder, target_size=(128, 128))
    load_preprocess_time = time.time() - start_time
    log.append(f"Processing {root_folder_suffix}:{model_type} - Time taken for loading and preprocessing: {load_preprocess_time:.2f} seconds")
    print(f"Processing {root_folder_suffix}:{model_type} - Time taken for loading and preprocessing: {load_preprocess_time:.2f} seconds")

    best_params = None
    best_score = 0
    best_target_size = None
    best_conf_matrix = None
    best_X_train_resized = None

    # K-Fold Cross-Validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    timing_results = []
    cm_results = []

    for target_size in image_sizes:
        start_size_time = time.time()
        log.append(f"Processing {root_folder_suffix}:{model_type} - Evaluating for image size: {target_size}")
        print(f"Processing {root_folder_suffix}:{model_type} - Evaluating for image size: {target_size}")

        X_train_resized = preprocess_images(X_train_orig, target_size)

        for n_estimators in param_grid['n_estimators']:
            for criterion in param_grid['criterion']:
                for max_depth in param_grid['max_depth']:
                    for min_samples_split in param_grid['min_samples_split']:
                        for min_samples_leaf in param_grid['min_samples_leaf']:
                            params = {
                                'n_estimators': n_estimators,
                                'criterion': criterion,
                                'max_depth': max_depth,
                                'min_samples_split': min_samples_split,
                                'min_samples_leaf': min_samples_leaf
                            }
                            rf = RandomForestClassifier(**params)

                            start_grid_item_time = time.time()
                            scores = cross_val_score(rf, X_train_resized, y_train, cv=kf, scoring='accuracy')
                            grid_item_time = time.time() - start_grid_item_time

                            mean_score = scores.mean()
                            timing_results.append({
                                'image_size': target_size,
                                'n_estimators': n_estimators,
                                'criterion': criterion,
                                'max_depth': max_depth,
                                'min_samples_split': min_samples_split,
                                'min_samples_leaf': min_samples_leaf,
                                'mean_score': mean_score,
                                'time': grid_item_time,
                                'scoring_metric': 'accuracy'
                            })

                            log.append(f"Processing {root_folder_suffix}:{model_type} - Params: {params} - Mean Score (accuracy): {mean_score:.4f} - Time: {grid_item_time:.2f} seconds")
                            print(f"Processing {root_folder_suffix}:{model_type} - Params: {params} - Mean Score (accuracy): {mean_score:.4f} - Time: {grid_item_time:.2f} seconds")

                            # Collect confusion matrix for the current model
                            X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(X_train_resized, y_train, test_size=0.2, random_state=42)
                            rf.fit(X_train_split, y_train_split)
                            y_valid_pred = rf.predict(X_valid_split)
                            cm = confusion_matrix(y_valid_split, y_valid_pred)
                            cm_results.append({
                                'image_size': target_size,
                                'params': params,
                                'confusion_matrix': cm
                            })

                            if mean_score > best_score:
                                best_score = mean_score
                                best_params = params
                                best_target_size = target_size
                                best_conf_matrix = cm
                                best_X_train_resized = X_train_resized

        size_time = time.time() - start_size_time
        log.append(f"Processing {root_folder_suffix}:{model_type} - Total time taken for image size {target_size}: {size_time:.2f} seconds\n")
        print(f"Processing {root_folder_suffix}:{model_type} - Total time taken for image size {target_size}: {size_time:.2f} seconds\n")

    log.append(f"Processing {root_folder_suffix}:{model_type} - Best Score: {best_score}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Best Parameters: {best_params}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Best Image Size: {best_target_size}")
    print(f"Processing {root_folder_suffix}:{model_type} - Best Score: {best_score}")
    print(f"Processing {root_folder_suffix}:{model_type} - Best Parameters: {best_params}")
    print(f"Processing {root_folder_suffix}:{model_type} - Best Image Size: {best_target_size}")

    # Save results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    result_folder = os.path.dirname(train_dataset_folder)
    file_path = os.path.join(result_folder, f"{root_folder_suffix}_{model_type}_RF_results_{timestamp}.xlsx")

    with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
        # Save best parameters
        best_params_df = pd.DataFrame([best_params])
        best_params_df['image_size'] = [best_target_size]
        best_params_df.to_excel(writer, sheet_name='Best_Parameters', index=False)

        # Save timing results
        timing_df = pd.DataFrame(timing_results)
        timing_df.to_excel(writer, sheet_name='Timing_Results', index=False)

        # Save confusion matrices for all grid search iterations
        cm_data = []
        for cm_result in cm_results:
            for i in range(len(class_names)):
                for j in range(len(class_names)):
                    cm_data.append({
                        'image_size': cm_result['image_size'],
                        'params': cm_result['params'],
                        'true_class': class_names[i],
                        'predicted_class': class_names[j],
                        'count': cm_result['confusion_matrix'][i, j]
                    })
        cm_df = pd.DataFrame(cm_data)
        cm_df.to_excel(writer, sheet_name='Confusion_Matrices', index=False)

    log.append(f"Processing {root_folder_suffix}:{model_type} - Grid search results saved to {file_path}")
    print(f"Processing {root_folder_suffix}:{model_type} - Grid search results saved to {file_path}")

    # Load the test dataset with best image size
    log.append(f"Processing {root_folder_suffix}:{model_type} - Loading and preprocessing test data")
    print(f"Processing {root_folder_suffix}:{model_type} - Loading and preprocessing test data")
    X_test_orig, y_test, _, test_filenames = load_images_from_folder(test_dataset_folder, target_size=best_target_size)

    # Preprocess test images
    X_test_resized = preprocess_images(X_test_orig, best_target_size)

    # Evaluate the final model on the test dataset
    start_time = time.time()
    final_model = RandomForestClassifier(**best_params)
    final_model.fit(best_X_train_resized, y_train)
    train_time = time.time() - start_time

    start_time = time.time()
    y_test_pred = final_model.predict(X_test_resized)
    predict_time = time.time() - start_time

    test_accuracy = accuracy_score(y_test, y_test_pred)
    log.append(f"Processing {root_folder_suffix}:{model_type} - Test Accuracy: {test_accuracy:.4f}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Time taken for final training: {train_time:.2f} seconds")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Time taken for final prediction: {predict_time:.2f} seconds")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Test Classification Report:\n{classification_report(y_test, y_test_pred, target_names=class_names)}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Test Confusion Matrix:\n{confusion_matrix(y_test, y_test_pred)}")
    print(f"Processing {root_folder_suffix}:{model_type} - Test Accuracy: {test_accuracy:.4f}")
    print(f"Processing {root_folder_suffix}:{model_type} - Time taken for final training: {train_time:.2f} seconds")
    print(f"Processing {root_folder_suffix}:{model_type} - Time taken for final prediction: {predict_time:.2f} seconds")
    print(f"Processing {root_folder_suffix}:{model_type} - Test Classification Report:\n{classification_report(y_test, y_test_pred, target_names=class_names)}")
    print(f"Processing {root_folder_suffix}:{model_type} - Test Confusion Matrix:\n{confusion_matrix(y_test, y_test_pred)}")

    # Save final test results
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
        # Save test classification report
        test_report = classification_report(y_test, y_test_pred, target_names=class_names, output_dict=True)
        test_report_df = pd.DataFrame(test_report).transpose()
        test_report_df.to_excel(writer, sheet_name='Test_Classification_Report')

        # Save test confusion matrix
        test_cm = confusion_matrix(y_test, y_test_pred)
        test_cm_df = pd.DataFrame(test_cm, index=class_names, columns=class_names)
        test_cm_df.to_excel(writer, sheet_name='Test_Confusion_Matrix')

        # Save image predictions
        predictions_df = pd.DataFrame({
            'Filename': test_filenames,
            'Actual': [class_names[label] for label in y_test],
            'Predicted': [class_names[label] for label in y_test_pred]
        })
        predictions_df.to_excel(writer, sheet_name='Image_Predictions', index=False)

        # Save summary
        summary_data = {
            'Best Score': [best_score],
            'Best Parameters': [best_params],
            'Best Image Size': [best_target_size],
            'Test Accuracy': [test_accuracy],
            'Training Time': [train_time],
            'Prediction Time': [predict_time]
        }
        summary_df = pd.DataFrame(summary_data)
        summary_df.to_excel(writer, sheet_name='Summary', index=False)

        # Save log
        log_df = pd.DataFrame(log, columns=['Log'])
        log_df.to_excel(writer, sheet_name='Log', index=False)

    # Record end time for the model processing
    model_end_time = datetime.now()
    log.append(f"Processing {root_folder_suffix}:{model_type} - Ended at {model_end_time}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Total duration: {model_end_time - model_start_time}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Best Parameters identified from the grid search: {best_params}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Best Image Size identified from the grid search: {best_target_size}")
    log.append(f"Processing {root_folder_suffix}:{model_type} - Final test results saved to {file_path}")
    print(f"Processing {root_folder_suffix}:{model_type} - Ended at {model_end_time}")
    print(f"Processing {root_folder_suffix}:{model_type} - Total duration: {model_end_time - model_start_time}")
    print(f"Processing {root_folder_suffix}:{model_type} - Best Parameters identified from the grid search: {best_params}")
    print(f"Processing {root_folder_suffix}:{model_type} - Best Image Size identified from the grid search: {best_target_size}")
    print(f"Processing {root_folder_suffix}:{model_type} - Final test results saved to {file_path}")

# Function to iterate over the data directories and run the model
def run_models_for_all_datasets(root_folders):
    # Record start time for the overall processing
    overall_start_time = datetime.now()
    print(f"Overall processing started at {overall_start_time}")

    model_types = ['country', 'exact_piece', 'force', 'piece']

    tasks = []

    for root_folder in root_folders:
        root_folder_suffix = os.path.basename(root_folder)[-2:]
        for model_type in model_types:
            train_folder = os.path.join(root_folder, f"{model_type}_train")
            test_folder = os.path.join(root_folder, f"{model_type}_test")

            if os.path.exists(train_folder) and os.path.exists(test_folder):
                tasks.append((train_folder, test_folder, model_type, root_folder_suffix))
            else:
                print(f"Skipping {model_type} in {root_folder} as train or test folder does not exist")

    # Run models in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(run_model, train, test, model, suffix) for train, test, model, suffix in tasks]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as exc:
                print(f"Generated an exception: {exc}")

    # Record end time for the overall processing
    overall_end_time = datetime.now()
    print(f"Overall processing ended at {overall_end_time}")
    print(f"Total duration for all models: {overall_end_time - overall_start_time}")

# List of root folders
root_folders = [
    'C:\\Users\\ReCas\\OneDrive\\Documents\\2024_AIMachineLearning\\99_Projects\\06.DataSets_gy',
    'C:\\Users\\ReCas\\OneDrive\\Documents\\2024_AIMachineLearning\\99_Projects\\07.DataSets_or',
    'C:\\Users\\ReCas\\OneDrive\\Documents\\2024_AIMachineLearning\\99_Projects\\08.DataSets_fc'
]

# Run the models for all datasets
run_models_for_all_datasets(root_folders)


Overall processing started at 2024-08-05 21:55:03.197141
Processing gy:country - Started at 2024-08-05 21:55:03.199140
Processing gy:country - Loading and preprocessing training data
Processing gy:exact_piece - Started at 2024-08-05 21:55:03.200140
Processing gy:exact_piece - Loading and preprocessing training data
Processing gy:force - Started at 2024-08-05 21:55:03.200140
Processing gy:force - Loading and preprocessing training data
Processing gy:piece - Started at 2024-08-05 21:55:03.201141
Processing gy:piece - Loading and preprocessing training data
Processing or:country - Started at 2024-08-05 21:55:03.201141
Processing or:country - Loading and preprocessing training data
Processing or:exact_piece - Started at 2024-08-05 21:55:03.201141
Processing or:exact_piece - Loading and preprocessing training data
Processing or:force - Started at 2024-08-05 21:55:03.202141
Processing or:force - Loading and preprocessing training data
Processing or:piece - Started at 2024-08-05 21:55:03.2021

Processing or:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.5948 - Time: 3.56 seconds
Processing or:country - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6977 - Time: 3.74 seconds
Processing gy:piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9224 - Time: 13.24 seconds
Processing or:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6879 - Time: 5.14 seconds
Processing or:force - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9812 - Time: 10.41 seconds
Processing fc:exact_piece - Time take

Processing fc:country - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7435 - Time: 12.38 seconds
Processing or:country - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7713 - Time: 16.67 seconds
Processing or:exact_piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6879 - Time: 15.07 seconds
Processing fc:piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9207 - Time: 11.99 seconds
Processing gy:force - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9771 - Time: 18.84 seconds
Processing gy:piece - Params: {'n_estim

Processing or:piece - Total time taken for image size (64, 64): 143.40 seconds

Processing or:piece - Evaluating for image size: (128, 128)
Processing gy:country - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7067 - Time: 7.90 seconds
Processing fc:force - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9616 - Time: 4.59 seconds
Processing or:piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.8717 - Time: 6.17 seconds
Processing gy:country - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6830 - Time: 5.58 seconds
Processing fc:country - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 10, 'min

Processing fc:country - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6994 - Time: 7.98 seconds
Processing or:piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9191 - Time: 26.73 seconds
Processing gy:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7051 - Time: 13.86 seconds
Processing gy:country - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7280 - Time: 25.85 seconds
Processing gy:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.5253 - Time: 6.65 seconds
Processing fc:piece - Params: {'n_

Processing fc:force - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9812 - Time: 42.19 seconds
Processing or:force - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9624 - Time: 10.41 seconds
Processing or:exact_piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7590 - Time: 67.49 seconds
Processing gy:exact_piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6806 - Time: 32.41 seconds
Processing fc:force - Total time taken for image size (128, 128): 238.78 seconds

Processing fc:force - Evaluating for image size: (256, 256)
Processing or:force - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_dept

Processing gy:country - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7680 - Time: 74.57 seconds
Processing fc:piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.8897 - Time: 12.55 seconds
Processing gy:country - Total time taken for image size (128, 128): 382.21 seconds

Processing gy:country - Evaluating for image size: (256, 256)
Processing gy:piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9216 - Time: 58.79 seconds
Processing or:force - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9804 - Time: 42.37 seconds
Processing gy:country - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': No

Processing gy:piece - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9248 - Time: 108.50 seconds
Processing or:country - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7402 - Time: 57.75 seconds
Processing gy:force - Total time taken for image size (256, 256): 475.63 seconds

Processing gy:force - Best Score: 0.9844864503178321
Processing gy:force - Best Parameters: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1}
Processing gy:force - Best Image Size: (256, 256)
Processing gy:force - Grid search results saved to C:\Users\ReCas\OneDrive\Documents\2024_AIMachineLearning\99_Projects\06.DataSets_gy\gy_force_RF_results_20240805_220927.xlsx
Processing gy:force - Loading and preprocessing test data
Processing gy:exact_piece - Params: {'n_estimat

Processing gy:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6936 - Time: 25.30 seconds
Processing gy:piece - Params: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.9256 - Time: 101.49 seconds
Processing or:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7157 - Time: 28.24 seconds
Processing fc:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.7198 - Time: 26.32 seconds
Processing or:exact_piece - Params: {'n_estimators': 10, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.5490 - Time: 13.25 seconds
Processing fc:exact_

Processing or:piece - Ended at 2024-08-05 22:14:23.478858
Processing or:piece - Total duration: 0:19:20.276717
Processing or:piece - Best Parameters identified from the grid search: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1}
Processing or:piece - Best Image Size identified from the grid search: (128, 128)
Processing or:piece - Final test results saved to C:\Users\ReCas\OneDrive\Documents\2024_AIMachineLearning\99_Projects\07.DataSets_or\or_piece_RF_results_20240805_221355.xlsx
Processing gy:exact_piece - Params: {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1} - Mean Score (accuracy): 0.6626 - Time: 54.25 seconds
Processing fc:piece - Test Accuracy: 0.9052
Processing fc:piece - Time taken for final training: 11.04 seconds
Processing fc:piece - Time taken for final prediction: 0.02 seconds
Processing fc:piece - Test Classification Report:
              precision    re

Processing fc:country - Test Accuracy: 0.6928
Processing fc:country - Time taken for final training: 24.26 seconds
Processing fc:country - Time taken for final prediction: 0.03 seconds
Processing fc:country - Test Classification Report:
              precision    recall  f1-score   support

      100RUS       0.66      0.64      0.65        61
      200GER       0.74      0.84      0.78        61
       300UK       0.78      0.70      0.74        61
      400JAP       0.61      0.77      0.68        62
      500USA       0.70      0.51      0.59        61

    accuracy                           0.69       306
   macro avg       0.70      0.69      0.69       306
weighted avg       0.70      0.69      0.69       306

Processing fc:country - Test Confusion Matrix:
[[39  9  2  7  4]
 [ 5 51  1  3  1]
 [ 2  2 43  9  5]
 [ 5  0  6 48  3]
 [ 8  7  3 12 31]]
Processing fc:country - Ended at 2024-08-05 22:18:07.601091
Processing fc:country - Total duration: 0:23:04.398950
Processing fc:country

Processing fc:exact_piece - Total time taken for image size (256, 256): 816.76 seconds

Processing fc:exact_piece - Best Score: 0.7737002341920375
Processing fc:exact_piece - Best Parameters: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1}
Processing fc:exact_piece - Best Image Size: (256, 256)
Processing fc:exact_piece - Grid search results saved to C:\Users\ReCas\OneDrive\Documents\2024_AIMachineLearning\99_Projects\08.DataSets_fc\fc_exact_piece_RF_results_20240805_222432.xlsx
Processing fc:exact_piece - Loading and preprocessing test data
Processing or:exact_piece - Test Accuracy: 0.8105
Processing or:exact_piece - Time taken for final training: 19.27 seconds
Processing or:exact_piece - Time taken for final prediction: 0.02 seconds
Processing or:exact_piece - Test Classification Report:
                  precision    recall  f1-score   support

101_100RUS_01INF       0.88      1.00      0.93         7
102_100RUS_02TNK     