In [2]:
import os
import cv2
import pickle
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.linear_model import RANSACRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
import time

warnings.filterwarnings('ignore')

# ==================== CONFIGURATION ====================
PROCESSED_DIR = './data/processed'
RESULTS_DIR = './geometric_results'
DATASET_NAME = 'mnist'
TRAIN_SAMPLES = 10000
TEST_SAMPLES = 2000
N_VISUAL_SAMPLES = 5
SEED = 42

# Training and test set combinations
TRAIN_TYPES = ['original', 'mixed_augmented', 'combined_augmented']
TEST_TYPES = ['original', 'rotation_15', 'noise', 'scaling_0.8', 'occlusion_25', 'all_combined']

np.random.seed(SEED)
os.makedirs(RESULTS_DIR, exist_ok=True)

# ==================== GEOMETRY FEATURE EXTRACTION ====================
class GeometryExtractor:
    def __init__(self):
        self.results_dir = RESULTS_DIR
        
    def _convert_to_gray(self, img):
        """Convert image to grayscale uint8"""
        if len(img.shape) == 3 and img.shape[-1] == 3:
            return cv2.cvtColor((img * 255).astype('uint8'), cv2.COLOR_RGB2GRAY)
        return (img.squeeze() * 255).astype('uint8')
    
    def extract_hough_features(self, gray):
        """Extract Hough Transform features"""
        edges = cv2.Canny(gray, 50, 150)
        vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
        
        # Lines detection
        lines = cv2.HoughLinesP(edges, 1, np.pi/180, 30, 20, 10)
        num_lines, lengths, angles = 0, [], []
        if lines is not None:
            for line in lines:
                x1, y1, x2, y2 = line[0]
                cv2.line(vis, (x1, y1), (x2, y2), (0, 255, 0), 1)
                lengths.append(np.hypot(x2 - x1, y2 - y1))
                angles.append(abs(np.degrees(np.arctan2(y2 - y1, x2 - x1))))
            num_lines = len(lines)
        
        # Circles detection
        blurred = cv2.GaussianBlur(gray, (5, 5), 2)
        circles = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, 1.2, gray.shape[0] // 6,
                                   param1=50, param2=30, minRadius=5, maxRadius=50)
        num_circles, avg_radius = 0, 0
        if circles is not None:
            circles = np.uint16(np.around(circles))
            for (x, y, r) in circles[0, :]:
                cv2.circle(vis, (x, y), r, (0, 0, 255), 1)
            num_circles = len(circles[0])
            avg_radius = np.mean(circles[0][:, 2])
        
        # Edge density
        h, w = gray.shape
        edge_density = np.sum(edges > 0) / (h * w)
        
        features = {
            'hough_num_lines': num_lines,
            'hough_mean_length': np.mean(lengths) if lengths else 0,
            'hough_mean_angle': np.mean(angles) if angles else 0,
            'hough_num_circles': num_circles,
            'hough_avg_radius': avg_radius,
            'hough_edge_density': edge_density
        }
        return features, vis
    
    def extract_ransac_features(self, gray):
        """Extract RANSAC features"""
        vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
        edges = cv2.Canny(gray, 50, 150)
        points = np.column_stack(np.where(edges > 0))
        
        if len(points) < 10:
            return {'ransac_line_inlier_ratio': 0, 'ransac_circle_inlier_ratio': 0}, vis
        
        # Line fit with RANSAC
        X, y = points[:, 1].reshape(-1, 1), points[:, 0]
        line_inlier_ratio = 0
        try:
            ransac = RANSACRegressor(residual_threshold=3.0, random_state=SEED)
            ransac.fit(X, y)
            y_pred = ransac.predict(X)
            line_inlier_ratio = np.sum(ransac.inlier_mask_) / len(points)
            # Draw sample points
            for x1, y1p in zip(X.flatten()[::10], y_pred[::10]):
                cv2.circle(vis, (int(x1), int(y1p)), 1, (255, 0, 0), -1)
        except:
            pass
        
        # Circle fit with RANSAC (simplified)
        circle_inlier_ratio = 0
        for _ in range(10):
            if len(points) < 3:
                break
            idx = np.random.choice(len(points), 3, replace=False)
            x1, y1 = points[idx[0]]
            x2, y2 = points[idx[1]]
            x3, y3 = points[idx[2]]
            A = x1*(y2-y3) - y1*(x2-x3) + x2*y3 - x3*y2
            if A == 0:
                continue
            B = (x1**2+y1**2)*(y3-y2)+(x2**2+y2**2)*(y1-y3)+(x3**2+y3**2)*(y2-y1)
            C = (x1**2+y1**2)*(x2-x3)+(x2**2+y2**2)*(x3-x1)+(x3**2+y3**2)*(x1-x2)
            cx, cy = -B/(2*A), -C/(2*A)
            r_est = np.sqrt((points[:,1]-cx)**2 + (points[:,0]-cy)**2)
            if np.any(np.isnan(r_est)):
                continue
            inliers = np.abs(r_est - np.mean(r_est)) < 5
            inlier_ratio = np.sum(inliers) / len(points)
            if inlier_ratio > circle_inlier_ratio:
                circle_inlier_ratio = inlier_ratio
                cv2.circle(vis, (int(cx), int(cy)), int(np.mean(r_est)), (255, 255, 0), 1)
        
        return {
            'ransac_line_inlier_ratio': line_inlier_ratio,
            'ransac_circle_inlier_ratio': circle_inlier_ratio
        }, vis
    
    def extract_combined_features(self, gray):
        """Extract combined Hough + RANSAC features"""
        hough_feats, hough_vis = self.extract_hough_features(gray)
        ransac_feats, ransac_vis = self.extract_ransac_features(gray)
        combined_vis = cv2.addWeighted(hough_vis, 0.6, ransac_vis, 0.6, 0)
        return {**hough_feats, **ransac_feats}, combined_vis
    
    def extract_all_features(self, images, labels):
        """Extract all geometry features from images"""
        print("Extracting geometry features...")
        hough_features = []
        ransac_features = []
        combined_features = []
        
        for img in tqdm(images, desc="Processing images"):
            gray = self._convert_to_gray(img)
            
            # Extract all three types
            h_feat, _ = self.extract_hough_features(gray)
            r_feat, _ = self.extract_ransac_features(gray)
            c_feat, _ = self.extract_combined_features(gray)
            
            hough_features.append(h_feat)
            ransac_features.append(r_feat)
            combined_features.append(c_feat)
        
        # Convert to DataFrames
        df_hough = pd.DataFrame(hough_features)
        df_ransac = pd.DataFrame(ransac_features)
        df_combined = pd.DataFrame(combined_features)
        
        # Add labels
        df_hough['label'] = labels
        df_ransac['label'] = labels
        df_combined['label'] = labels
        
        return df_hough, df_ransac, df_combined
    
    def save_visualization_samples(self, images, labels):
        """Save visualization samples for all geometry types"""
        print(f"\nSaving {N_VISUAL_SAMPLES} visualization samples...")
        
        # Create visualization directory
        vis_dir = os.path.join(self.results_dir, 'visualizations')
        os.makedirs(vis_dir, exist_ok=True)
        
        # Select random samples
        indices = np.random.choice(len(images), N_VISUAL_SAMPLES, replace=False)
        
        for idx in indices:
            img = images[idx]
            label = labels[idx]
            gray = self._convert_to_gray(img)
            
            # Extract all visualizations
            _, hough_vis = self.extract_hough_features(gray)
            _, ransac_vis = self.extract_ransac_features(gray)
            _, combined_vis = self.extract_combined_features(gray)
            
            # Create figure
            fig, axes = plt.subplots(1, 4, figsize=(16, 4))
            
            # Original
            axes[0].imshow(gray, cmap='gray')
            axes[0].set_title(f'Original (Label: {label})')
            axes[0].axis('off')
            
            # Hough
            axes[1].imshow(cv2.cvtColor(hough_vis, cv2.COLOR_BGR2RGB))
            axes[1].set_title('Hough Transform')
            axes[1].axis('off')
            
            # RANSAC
            axes[2].imshow(cv2.cvtColor(ransac_vis, cv2.COLOR_BGR2RGB))
            axes[2].set_title('RANSAC')
            axes[2].axis('off')
            
            # Combined
            axes[3].imshow(cv2.cvtColor(combined_vis, cv2.COLOR_BGR2RGB))
            axes[3].set_title('Combined (Hough + RANSAC)')
            axes[3].axis('off')
            
            plt.suptitle(f'Geometry Detection Comparison - Sample {idx}', fontsize=14)
            plt.tight_layout()
            
            save_path = os.path.join(vis_dir, f'sample_{idx}_label_{label}.png')
            plt.savefig(save_path, dpi=150, bbox_inches='tight')
            plt.close()
        
        print(f"✓ Saved visualizations to {vis_dir}")

# ==================== MODEL TRAINING ====================
class ModelTrainer:
    def __init__(self, results_dir):
        self.results_dir = results_dir
        self.models = {
            'SVM_RBF': SVC(kernel='rbf', random_state=SEED),
            'LogisticRegression': LogisticRegression(max_iter=1000, random_state=SEED),
            'MLP': MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=200, random_state=SEED),
            'RandomForest': RandomForestClassifier(n_estimators=100, random_state=SEED, n_jobs=-1),
            'LightGBM': lgb.LGBMClassifier(n_estimators=100, random_state=SEED, verbose=-1)
        }
    
    def prepare_data(self, df):
        """Prepare features and labels"""
        feature_cols = [c for c in df.columns if c != 'label']
        X = df[feature_cols].fillna(0).values
        y = df['label'].values
        return X, y, feature_cols
    
    def train_and_evaluate(self, X_train, y_train, X_test, y_test, feature_type, train_type, test_type):
        """Train all models and evaluate"""
        results = {}
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        print(f"\n{'='*80}")
        print(f"TRAINING SET: {train_type.upper()}")
        print(f"TEST SET: {test_type.upper()}")
        print(f"FEATURE TYPE: {feature_type.upper()}")
        print(f"{'='*80}")
        print(f"Train samples: {len(X_train)}, Features: {X_train.shape[1]}")
        print(f"Test samples: {len(X_test)}, Features: {X_test.shape[1]}")
        
        for model_name, model_class in [
            ('SVM_RBF', lambda: SVC(kernel='rbf', random_state=SEED)),
            ('LogisticRegression', lambda: LogisticRegression(max_iter=1000, random_state=SEED)),
            ('MLP', lambda: MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=200, random_state=SEED)),
            ('RandomForest', lambda: RandomForestClassifier(n_estimators=100, random_state=SEED, n_jobs=-1))
        ]:
            print(f"\nTraining {model_name}...")
            start_time = time.time()
            
            # Create fresh model instance
            model = model_class()
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
            
            train_time = time.time() - start_time
            
            # Metrics
            accuracy = accuracy_score(y_test, y_pred)
            f1_macro = f1_score(y_test, y_pred, average='macro')
            f1_weighted = f1_score(y_test, y_pred, average='weighted')
            
            report = classification_report(y_test, y_pred, digits=4)
            
            results[model_name] = {
                'accuracy': accuracy,
                'f1_macro': f1_macro,
                'f1_weighted': f1_weighted,
                'train_time': train_time,
                'report': report,
                'y_pred': y_pred
            }
            
            print(f"  Accuracy: {accuracy:.4f}")
            print(f"  F1-Macro: {f1_macro:.4f}")
            print(f"  F1-Weighted: {f1_weighted:.4f}")
            print(f"  Training Time: {train_time:.2f}s")
        
        return results
    
    def save_results(self, all_results, feature_type):
        """Save comprehensive results to text file"""
        output_path = os.path.join(self.results_dir, f'{feature_type}_classification_results.txt')
        
        with open(output_path, 'w') as f:
            f.write(f"{'='*100}\n")
            f.write(f"CLASSIFICATION RESULTS - {feature_type.upper()}\n")
            f.write(f"{'='*100}\n\n")
            f.write(f"Dataset: {DATASET_NAME.upper()}\n")
            f.write(f"Training Samples: {TRAIN_SAMPLES}\n")
            f.write(f"Test Samples: {TEST_SAMPLES}\n\n")
            
            # Group by training set
            for train_type in TRAIN_TYPES:
                f.write(f"\n{'#'*100}\n")
                f.write(f"TRAINING SET: {train_type.upper()}\n")
                f.write(f"{'#'*100}\n")
                
                for test_type in TEST_TYPES:
                    key = (train_type, test_type)
                    if key not in all_results:
                        continue
                    
                    results = all_results[key]
                    
                    f.write(f"\n{'-'*100}\n")
                    f.write(f"TEST SET: {test_type.upper()}\n")
                    f.write(f"{'-'*100}\n\n")
                    
                    for model_name, model_results in results.items():
                        f.write(f"\n{'='*80}\n")
                        f.write(f"MODEL: {model_name}\n")
                        f.write(f"{'='*80}\n\n")
                        f.write(f"Accuracy: {model_results['accuracy']:.4f}\n")
                        f.write(f"F1-Score (Macro): {model_results['f1_macro']:.4f}\n")
                        f.write(f"F1-Score (Weighted): {model_results['f1_weighted']:.4f}\n")
                        f.write(f"Training Time: {model_results['train_time']:.2f} seconds\n\n")
                        f.write("Classification Report:\n")
                        f.write(model_results['report'])
                        f.write("\n\n")
        
        print(f"✓ Saved results to {output_path}")

# ==================== MAIN PIPELINE ====================
def main():
    print(f"\n{'='*100}")
    print(f"GEOMETRY-BASED CLASSIFICATION PIPELINE FOR {DATASET_NAME.upper()}")
    print(f"3 Training Sets x 6 Test Sets = 18 Classifications per Model")
    print(f"{'='*100}\n")
    
    start_time = time.time()
    
    # ==================== STEP 1: Load All Data ====================
    print(f"Step 1: Loading all {DATASET_NAME.upper()} datasets...")
    
    all_train_data = {}
    all_test_data = {}
    
    # Load all training sets
    for train_type in TRAIN_TYPES:
        train_path = os.path.join(PROCESSED_DIR, f'{DATASET_NAME}_train', f'{train_type}.pkl')
        if not os.path.exists(train_path):
            raise FileNotFoundError(f"Training data not found at {train_path}")
        
        with open(train_path, 'rb') as f:
            data = pickle.load(f)
        
        # Sample data
        if len(data['images']) > TRAIN_SAMPLES:
            indices = np.random.choice(len(data['images']), TRAIN_SAMPLES, replace=False)
            all_train_data[train_type] = {
                'images': data['images'][indices],
                'labels': data['labels'][indices]
            }
        else:
            all_train_data[train_type] = data
        
        print(f"✓ Loaded {train_type}: {len(all_train_data[train_type]['images'])} samples")
    
    # Load all test sets
    for test_type in TEST_TYPES:
        test_path = os.path.join(PROCESSED_DIR, f'{DATASET_NAME}_test', f'{test_type}.pkl')
        if not os.path.exists(test_path):
            raise FileNotFoundError(f"Test data not found at {test_path}")
        
        with open(test_path, 'rb') as f:
            data = pickle.load(f)
        
        # Sample data
        if len(data['images']) > TEST_SAMPLES:
            indices = np.random.choice(len(data['images']), TEST_SAMPLES, replace=False)
            all_test_data[test_type] = {
                'images': data['images'][indices],
                'labels': data['labels'][indices]
            }
        else:
            all_test_data[test_type] = data
        
        print(f"✓ Loaded {test_type}: {len(all_test_data[test_type]['images'])} samples")
    
    # ==================== STEP 2: Save Visualizations ====================
    print(f"\nStep 2: Saving visualization samples...")
    
    extractor = GeometryExtractor()
    
    # Use original test set for visualizations
    test_images = all_test_data['original']['images'][:50]
    test_labels = all_test_data['original']['labels'][:50]
    extractor.save_visualization_samples(test_images, test_labels)
    
    # ==================== STEP 3: Extract Features for All Sets ====================
    print(f"\nStep 3: Extracting geometry features for all datasets...")
    
    all_train_features = {}
    all_test_features = {}
    
    # Extract features for all training sets
    for train_type in TRAIN_TYPES:
        print(f"\n  Processing training set: {train_type}")
        images = all_train_data[train_type]['images']
        labels = all_train_data[train_type]['labels']
        
        hough_df, ransac_df, combined_df = extractor.extract_all_features(images, labels)
        
        all_train_features[train_type] = {
            'hough': hough_df,
            'ransac': ransac_df,
            'combined': combined_df
        }
    
    # Extract features for all test sets
    for test_type in TEST_TYPES:
        print(f"\n  Processing test set: {test_type}")
        images = all_test_data[test_type]['images']
        labels = all_test_data[test_type]['labels']
        
        hough_df, ransac_df, combined_df = extractor.extract_all_features(images, labels)
        
        all_test_features[test_type] = {
            'hough': hough_df,
            'ransac': ransac_df,
            'combined': combined_df
        }
    
    print(f"\n✓ Feature extraction completed for all datasets")
    
    # ==================== STEP 4: Train Models (3 x 6 = 18 combinations) ====================
    print(f"\nStep 4: Training and evaluating models...")
    print(f"Total combinations: {len(TRAIN_TYPES)} train x {len(TEST_TYPES)} test = {len(TRAIN_TYPES) * len(TEST_TYPES)}")
    
    trainer = ModelTrainer(RESULTS_DIR)
    
    # Feature types to evaluate
    feature_types = ['hough', 'ransac', 'combined']
    feature_names = {
        'hough': 'Hough_Transform',
        'ransac': 'RANSAC',
        'combined': 'Combined_Hough_RANSAC'
    }
    
    # Store all results
    all_results_by_feature = {
        'hough': {},
        'ransac': {},
        'combined': {}
    }
    
    combination_count = 0
    total_combinations = len(TRAIN_TYPES) * len(TEST_TYPES) * len(feature_types)
    
    # Train for each feature type
    for feat_type in feature_types:
        print(f"\n{'#'*100}")
        print(f"FEATURE TYPE: {feature_names[feat_type].upper()}")
        print(f"{'#'*100}")
        
        # For each training set
        for train_type in TRAIN_TYPES:
            train_df = all_train_features[train_type][feat_type]
            X_train, y_train, _ = trainer.prepare_data(train_df)
            
            # For each test set
            for test_type in TEST_TYPES:
                combination_count += 1
                print(f"\n[Combination {combination_count}/{total_combinations}]")
                
                test_df = all_test_features[test_type][feat_type]
                X_test, y_test, _ = trainer.prepare_data(test_df)
                
                # Train and evaluate
                results = trainer.train_and_evaluate(
                    X_train, y_train, X_test, y_test,
                    feature_names[feat_type], train_type, test_type
                )
                
                all_results_by_feature[feat_type][(train_type, test_type)] = results
    
    # ==================== STEP 5: Save Results ====================
    print(f"\nStep 5: Saving all results...")
    
    for feat_type in feature_types:
        trainer.save_results(all_results_by_feature[feat_type], feature_names[feat_type])
    
    # ==================== STEP 6: Summary Report ====================
    print(f"\nStep 6: Generating summary report...")
    
    summary_path = os.path.join(RESULTS_DIR, 'SUMMARY_REPORT.txt')
    
    with open(summary_path, 'w') as f:
        f.write(f"{'='*100}\n")
        f.write(f"GEOMETRY-BASED CLASSIFICATION - COMPREHENSIVE SUMMARY\n")
        f.write(f"{'='*100}\n\n")
        f.write(f"Dataset: {DATASET_NAME.upper()}\n")
        f.write(f"Training Samples per set: {TRAIN_SAMPLES}\n")
        f.write(f"Test Samples per set: {TEST_SAMPLES}\n")
        f.write(f"Training Sets: {', '.join(TRAIN_TYPES)}\n")
        f.write(f"Test Sets: {', '.join(TEST_TYPES)}\n")
        f.write(f"Total Combinations: {len(TRAIN_TYPES)} x {len(TEST_TYPES)} = {len(TRAIN_TYPES) * len(TEST_TYPES)} per feature type\n")
        f.write(f"Total Execution Time: {time.time() - start_time:.2f} seconds\n\n")
        
        f.write(f"{'='*100}\n")
        f.write(f"BEST PERFORMANCE BY FEATURE TYPE\n")
        f.write(f"{'='*100}\n\n")
        
        overall_best = None
        overall_best_f1 = 0
        
        for feat_type in feature_types:
            f.write(f"\n{feature_names[feat_type]}:\n")
            f.write(f"{'-'*100}\n\n")
            
            results = all_results_by_feature[feat_type]
            
            # Find best combination for this feature type
            best_combo = None
            best_f1 = 0
            
            for (train_type, test_type), models_results in results.items():
                for model_name, model_results in models_results.items():
                    if model_results['f1_macro'] > best_f1:
                        best_f1 = model_results['f1_macro']
                        best_combo = (train_type, test_type, model_name, model_results)
                    
                    if model_results['f1_macro'] > overall_best_f1:
                        overall_best_f1 = model_results['f1_macro']
                        overall_best = (feature_names[feat_type], train_type, test_type, model_name, model_results)
            
            if best_combo:
                f.write(f"Best Configuration:\n")
                f.write(f"  Training Set: {best_combo[0]}\n")
                f.write(f"  Test Set: {best_combo[1]}\n")
                f.write(f"  Model: {best_combo[2]}\n")
                f.write(f"  Accuracy: {best_combo[3]['accuracy']:.4f}\n")
                f.write(f"  F1-Macro: {best_combo[3]['f1_macro']:.4f}\n")
                f.write(f"  F1-Weighted: {best_combo[3]['f1_weighted']:.4f}\n")
                f.write(f"  Training Time: {best_combo[3]['train_time']:.2f}s\n\n")
            
            # Average performance across all combinations
            all_f1s = []
            for models_results in results.values():
                for model_results in models_results.values():
                    all_f1s.append(model_results['f1_macro'])
            
            f.write(f"Average F1-Macro across all combinations: {np.mean(all_f1s):.4f}\n")
            f.write(f"Std Dev: {np.std(all_f1s):.4f}\n\n")
        
        f.write(f"\n{'='*100}\n")
        f.write(f"OVERALL BEST CONFIGURATION (ACROSS ALL FEATURE TYPES)\n")
        f.write(f"{'='*100}\n\n")
        
        if overall_best:
            f.write(f"Feature Type: {overall_best[0]}\n")
            f.write(f"Training Set: {overall_best[1]}\n")
            f.write(f"Test Set: {overall_best[2]}\n")
            f.write(f"Model: {overall_best[3]}\n")
            f.write(f"Accuracy: {overall_best[4]['accuracy']:.4f}\n")
            f.write(f"F1-Macro: {overall_best[4]['f1_macro']:.4f}\n")
            f.write(f"F1-Weighted: {overall_best[4]['f1_weighted']:.4f}\n")
            f.write(f"Training Time: {overall_best[4]['train_time']:.2f}s\n")
        
        # Model comparison across all combinations
        f.write(f"\n{'='*100}\n")
        f.write(f"MODEL PERFORMANCE COMPARISON (AVERAGE ACROSS ALL COMBINATIONS)\n")
        f.write(f"{'='*100}\n\n")
        
        model_stats = {}
        for feat_type in feature_types:
            for models_results in all_results_by_feature[feat_type].values():
                for model_name, model_results in models_results.items():
                    if model_name not in model_stats:
                        model_stats[model_name] = []
                    model_stats[model_name].append(model_results['f1_macro'])
        
        for model_name in sorted(model_stats.keys()):
            f1_scores = model_stats[model_name]
            f.write(f"{model_name:20s}: Mean F1={np.mean(f1_scores):.4f}, Std={np.std(f1_scores):.4f}, "
                   f"Min={np.min(f1_scores):.4f}, Max={np.max(f1_scores):.4f}\n")
    
    print(f"✓ Saved summary report to {summary_path}")
    
    # ==================== COMPLETION ====================
    total_time = time.time() - start_time
    print(f"\n{'='*100}")
    print(f"PIPELINE COMPLETED SUCCESSFULLY!")
    print(f"{'='*100}")
    print(f"Total Execution Time: {total_time:.2f} seconds ({total_time/60:.1f} minutes)")
    print(f"Total Combinations Evaluated: {len(TRAIN_TYPES)} train x {len(TEST_TYPES)} test x 3 features = {len(TRAIN_TYPES) * len(TEST_TYPES) * 3}")
    print(f"Total Model Trainings: {len(TRAIN_TYPES) * len(TEST_TYPES) * 3 * 5} (5 models per combination)")
    print(f"\nAll results saved in: {RESULTS_DIR}")
    print(f"\nGenerated files:")
    print(f"  - Visualizations: {os.path.join(RESULTS_DIR, 'visualizations')}")
    print(f"  - Hough_Transform_classification_results.txt (18 combinations)")
    print(f"  - RANSAC_classification_results.txt (18 combinations)")
    print(f"  - Combined_Hough_RANSAC_classification_results.txt (18 combinations)")
    print(f"  - SUMMARY_REPORT.txt (Overall best results)")
    print(f"{'='*100}\n")

if __name__ == "__main__":
    main()


GEOMETRY-BASED CLASSIFICATION PIPELINE FOR MNIST
3 Training Sets x 6 Test Sets = 18 Classifications per Model

Step 1: Loading all MNIST datasets...
✓ Loaded original: 10000 samples
✓ Loaded mixed_augmented: 10000 samples
✓ Loaded combined_augmented: 10000 samples
✓ Loaded original: 2000 samples
✓ Loaded rotation_15: 2000 samples
✓ Loaded noise: 2000 samples
✓ Loaded scaling_0.8: 2000 samples
✓ Loaded occlusion_25: 2000 samples
✓ Loaded all_combined: 2000 samples

Step 2: Saving visualization samples...

Saving 5 visualization samples...
✓ Saved visualizations to ./geometric_results\visualizations

Step 3: Extracting geometry features for all datasets...

  Processing training set: original
Extracting geometry features...


Processing images: 100%|██████████| 10000/10000 [16:06<00:00, 10.34it/s]



  Processing training set: mixed_augmented
Extracting geometry features...


Processing images: 100%|██████████| 10000/10000 [11:44<00:00, 14.20it/s]



  Processing training set: combined_augmented
Extracting geometry features...


Processing images: 100%|██████████| 10000/10000 [16:29<00:00, 10.10it/s] 



  Processing test set: original
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [02:37<00:00, 12.70it/s]



  Processing test set: rotation_15
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [03:24<00:00,  9.78it/s]



  Processing test set: noise
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [02:10<00:00, 15.31it/s]



  Processing test set: scaling_0.8
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [01:42<00:00, 19.46it/s]



  Processing test set: occlusion_25
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [02:07<00:00, 15.65it/s]



  Processing test set: all_combined
Extracting geometry features...


Processing images: 100%|██████████| 2000/2000 [01:57<00:00, 16.99it/s]



✓ Feature extraction completed for all datasets

Step 4: Training and evaluating models...
Total combinations: 3 train x 6 test = 18

####################################################################################################
FEATURE TYPE: HOUGH_TRANSFORM
####################################################################################################

[Combination 1/54]

TRAINING SET: ORIGINAL
TEST SET: ORIGINAL
FEATURE TYPE: HOUGH_TRANSFORM
Train samples: 10000, Features: 6
Test samples: 2000, Features: 6

Training SVM_RBF...
  Accuracy: 0.4540
  F1-Macro: 0.4212
  F1-Weighted: 0.4318
  Training Time: 2.09s

Training LogisticRegression...
  Accuracy: 0.4080
  F1-Macro: 0.3776
  F1-Weighted: 0.3887
  Training Time: 0.09s

Training MLP...
  Accuracy: 0.4360
  F1-Macro: 0.4085
  F1-Weighted: 0.4197
  Training Time: 24.12s

Training RandomForest...
  Accuracy: 0.3970
  F1-Macro: 0.3779
  F1-Weighted: 0.3891
  Training Time: 0.50s

[Combination 2/54]

TRAINING SET: ORIGINAL
T