**PSO Weight Optimization**

In [None]:
import pandas as pd
from pyswarm import pso  # Install this package: pip install pyswarm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
from tqdm import tqdm  # Import tqdm for progress bar
from sklearn.impute import SimpleImputer  # Import imputer to handle missing values

# Step 1: Load and Standardize Column Names
file_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

feature_sets = []
meta_data = None  # To store 'filename' and 'category' once, assuming they are consistent

# Load the datasets and preprocess
for path in tqdm(file_paths, desc="Loading Datasets", unit="file"):
    df = pd.read_csv(path)
    
    # Rename columns for consistency
    df.rename(columns={'Filename': 'filename', 'Labels': 'category'}, inplace=True)
    
    # Separate metadata (filename and category) from features
    if meta_data is None:
        meta_data = df[['filename', 'category']]
    else:
        df = df.drop(columns=['filename', 'category'], errors='ignore')
    
    # Add to feature sets list
    feature_sets.append(df)

# Step 2: Merge Feature Sets
merged_features = meta_data.copy()

for i, features in tqdm(enumerate(feature_sets), desc="Merging Feature Sets", total=len(feature_sets)):
    features = features.add_prefix(f"set{i}_")
    merged_features = pd.concat([merged_features, features], axis=1)

# Step 3: Prepare Data for Optimization
labels = merged_features['category']
features = merged_features.drop(columns=['filename', 'category'], errors='ignore')

# Encode labels for classification
labels = pd.factorize(labels)[0]

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

def objective_function(weights):
    weighted_features_train = 0
    weighted_features_test = 0

    # Apply weights to feature sets
    for i in range(len(feature_sets)):
        train_set = X_train.filter(like=f'set{i}_')
        test_set = X_test.filter(like=f'set{i}_')

        # Convert to numeric if necessary
        train_set = train_set.apply(pd.to_numeric, errors='coerce')
        test_set = test_set.apply(pd.to_numeric, errors='coerce')

        # Apply weights
        weighted_features_train += train_set * weights[i]
        weighted_features_test += test_set * weights[i]

    # Handle missing values by filling NaNs with the column mean
    imputer = SimpleImputer(strategy='mean')
    weighted_features_train = imputer.fit_transform(weighted_features_train)
    weighted_features_test = imputer.transform(weighted_features_test)

    # Ensure no NaN values exist in the training and testing datasets after imputation
    if np.isnan(weighted_features_train).any() or np.isnan(weighted_features_test).any():
        print("Warning: Missing values detected after applying weights.")

    # Check if weighted features are empty
    if weighted_features_train.shape[1] == 0:
        print("Warning: Weighted feature set is empty for training.")
        return 1  # Return a high value to avoid optimization with empty features

    # Train logistic regression model
    model = LogisticRegression(max_iter=1000)
    model.fit(weighted_features_train, y_train)
    
    # Predict on test set
    predictions = model.predict(weighted_features_test)
    
    # Calculate accuracy (maximize this, so return -accuracy)
    accuracy = accuracy_score(y_test, predictions)
    return -accuracy  # Minimize negative accuracy for optimization

# Step 4: Optimize Weights for Feature Sets
num_feature_sets = len(feature_sets)
lb = [0.1] * num_feature_sets  # Lower bounds for weights
ub = [1.0] * num_feature_sets  # Upper bounds for weights

print("Starting PSO optimization...")

# Perform PSO optimization
best_weights, best_accuracy = pso(objective_function, lb, ub, swarmsize=30, maxiter=100)

# Step 5: Save and Print Optimized Weights
weights_output_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights(2).csv"
weights_df = pd.DataFrame({
    'Feature_Set': [f"set{i}" for i in range(num_feature_sets)],
    'Weight': best_weights
})
weights_df.to_csv(weights_output_path, index=False)

print(f"Optimized weights saved to: {weights_output_path}")
print(f"Best Accuracy Achieved: {-best_accuracy}")

# Display Final Optimized Weights
print("Final Optimized Weights for Each Feature Set:")
for i, weight in enumerate(best_weights):
    print(f"set{i}: {weight:.4f}")

**Models Training on PSO optimizaed Weights**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

# File paths for feature vectors and weights
file_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

weights_csv_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights(2).csv"

# Load the weights
weights_df = pd.read_csv(weights_csv_path)
weights = weights_df['Weight'].values

# Step 1: Load and Standardize Column Names
feature_sets = []
meta_data = None  # To store 'filename' and 'category' once, assuming they are consistent

# Load and preprocess feature sets
for path in tqdm(file_paths, desc="Loading Feature Sets", unit="file"):
    df = pd.read_csv(path)
    
    # Rename columns for consistency
    df.rename(columns={'Filename': 'filename', 'Labels': 'category'}, inplace=True)
    
    # Separate metadata (filename and category) from features
    if meta_data is None:
        meta_data = df[['filename', 'category']]
    else:
        df = df.drop(columns=['filename', 'category'], errors='ignore')
    
    # Ensure all feature columns are numeric
    df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
    
    # Add to feature sets list
    feature_sets.append(df)

# Step 2: Apply Weights and Merge Feature Sets
weighted_features = None

for i, (features, weight) in tqdm(enumerate(zip(feature_sets, weights)), desc="Applying Weights", total=len(feature_sets)):
    weighted_set = features.add_prefix(f"set{i}_") * weight  # Apply weight
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Add labels back to the dataset
final_dataset = pd.concat([meta_data, weighted_features], axis=1)

# Step 3: Prepare Data for Training
labels = final_dataset['category']
features = final_dataset.drop(columns=['filename', 'category'], errors='ignore')

# Encode labels
labels = pd.factorize(labels)[0]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Step 4: Train a Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Evaluate the Model on Training and Testing Sets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Training accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy:.4f}")

# Testing accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Step 6: Detailed Performance on Testing Set
print("\nClassification Report (Testing Set):")
print(classification_report(y_test, y_test_pred))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from tqdm.contrib.concurrent import thread_map  # For parallel tasks

# Load weights
weights_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights(2).csv"  # Update with your weights file path
weights = pd.read_csv(weights_path)["Weight"].values

# Load and combine feature sets
feature_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

# Load datasets with a progress bar
print("Loading feature sets...")
feature_sets = [pd.read_csv(path) for path in tqdm(feature_paths, desc="Loading Features")]

# Extract labels and remove unnecessary columns
labels = feature_sets[0]["category"]  # Assuming all files share the same labels
for i in range(len(feature_sets)):
    feature_sets[i] = feature_sets[i].drop(columns=["filename", "category"], errors="ignore")

# Apply weights to features
print("Applying weights to feature sets...")
weighted_features = None
for i, (features, weight) in tqdm(enumerate(zip(feature_sets, weights)), desc="Applying Weights", total=len(feature_sets)):
    # Ensure all columns are numeric
    numeric_features = features.select_dtypes(include=[np.number])
    
    # Log a warning if non-numeric columns are dropped
    non_numeric_columns = features.columns.difference(numeric_features.columns)
    if not non_numeric_columns.empty:
        print(f"Warning: Dropping non-numeric columns from set{i}: {list(non_numeric_columns)}")
    
    # Apply weight to the numeric columns
    weighted_set = numeric_features.add_prefix(f"set{i}_") * weight
    
    # Combine with previous weighted features
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Ensure data alignment
assert len(weighted_features) == len(labels), "Features and labels must have the same number of samples."

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(weighted_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Hyperparameter tuning for SVM
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

# Custom progress bar for GridSearchCV
class TqdmGridSearchCV(GridSearchCV):
    def fit(self, X, y=None, **fit_params):
        with tqdm(total=len(self.cv_results_["params"]) if hasattr(self, "cv_results_") else 0, desc="Grid Search Progress") as pbar:
            def _update_progress(*args, **kwargs):
                pbar.update(1)
            self._fit_and_score = _update_progress
            return super().fit(X, y, **fit_params)

print("Starting SVM hyperparameter tuning...")
grid_search = TqdmGridSearchCV(SVC(), param_grid, refit=True, verbose=3, cv=5)
grid_search.fit(X_train, y_train)

# Best model
svm_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate model on test set
y_train_pred = svm_model.predict(X_train)
y_test_pred = svm_model.predict(X_test)

# Calculate and display accuracies
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

# Load weights
weights_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights(2).csv"  # Update with your weights file path
weights = pd.read_csv(weights_path)["Weight"].values

# Load and combine feature sets
feature_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

feature_sets = [pd.read_csv(path) for path in feature_paths]

# Extract labels and remove unnecessary columns
labels = feature_sets[0]["category"]  # Assuming all files share the same labels
for i in range(len(feature_sets)):
    feature_sets[i] = feature_sets[i].drop(columns=["filename", "category"], errors="ignore")
    feature_sets[i] = feature_sets[i].apply(pd.to_numeric, errors='coerce')  # Convert to numeric
    feature_sets[i] = feature_sets[i].fillna(0)  # Fill NaN with 0

# Apply weights to features
weighted_features = None
for i, (features, weight) in tqdm(enumerate(zip(feature_sets, weights)), desc="Applying Weights", total=len(feature_sets)):
    weighted_set = features.add_prefix(f"set{i}_") * weight  # Apply weight
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Ensure data alignment
assert len(weighted_features) == len(labels), "Features and labels must have the same number of samples."

# Normalize features
scaler = RobustScaler()
weighted_features = scaler.fit_transform(weighted_features)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
num_classes = len(np.unique(encoded_labels))
y_categorical = to_categorical(encoded_labels, num_classes=num_classes)

# Reshape features for CNN input (assuming 1D convolution)
X_train, X_test, y_train, y_test = train_test_split(weighted_features, y_categorical, test_size=0.2, random_state=42, stratify=encoded_labels)
X_train = X_train[..., np.newaxis]  # Add channel dimension for CNN
X_test = X_test[..., np.newaxis]

# Simplified CNN model to reduce memory consumption
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train with smaller batch size
history = model.fit(X_train, y_train, epochs=150, batch_size=16, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")


# Predictions and classification report
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))


**GA weight optimization***

In [None]:
import pandas as pd
import numpy as np
import os
import random
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from deap import base, creator, tools, algorithms

warnings.filterwarnings("ignore", category=UserWarning)  # Suppress warnings

# Paths to feature CSV files
file_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

feature_sets = []
meta_data = None

# Function to reduce memory usage
def reduce_memory_usage(df):
    for col in df.select_dtypes(include=['float64', 'int64']).columns:
        df[col] = pd.to_numeric(df[col], downcast='float')
    return df

# Load and preprocess datasets
for i, path in enumerate(file_paths):
    df = pd.read_csv(path)
    df.rename(columns={'Filename': 'filename', 'Labels': 'category'}, inplace=True)

    # Store metadata once
    if meta_data is None:
        meta_data = df[['filename', 'category']].copy()
        meta_data['category'] = meta_data['category'].astype('category')  # Convert to categorical

    # Drop non-numeric columns before merging
    df = df.drop(columns=['filename', 'category'], errors='ignore')
    df = reduce_memory_usage(df)
    feature_sets.append(df.add_prefix(f"set{i}_"))  # Add prefix to keep track of feature sets

# Merge Feature Sets
merged_features = pd.concat([meta_data] + feature_sets, axis=1)

# Convert category to numerical labels
labels = pd.factorize(merged_features['category'])[0]
features = merged_features.drop(columns=['filename', 'category'], errors='ignore')

# Convert all features to numeric, handling NaN values
features = features.apply(pd.to_numeric, errors='coerce')
features.fillna(features.mean(), inplace=True)  # Handle missing values by filling with mean

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Fix DEAP duplicate class warning
if hasattr(creator, "FitnessMax"):
    del creator.FitnessMax
if hasattr(creator, "Individual"):
    del creator.Individual

# Genetic Algorithm Setup
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

num_feature_sets = len(feature_sets)
IND_SIZE = num_feature_sets
POP_SIZE = 30
CX_PROB = 0.5
MUT_PROB = 0.2
NGEN = 200

def eval_weights(individual):
    try:
        weighted_train = np.zeros((X_train.shape[0], IND_SIZE), dtype=np.float32)  # Ensure correct shape
        weighted_test = np.zeros((X_test.shape[0], IND_SIZE), dtype=np.float32)  

        for i in range(num_feature_sets):
            subset_train = X_train.filter(like=f"set{i}_", axis=1).astype(np.float32)
            subset_test = X_test.filter(like=f"set{i}_", axis=1).astype(np.float32)

            # Ensure the subset has the correct shape
            if subset_train.shape[1] > 0:
                weighted_train[:, i] = subset_train.mean(axis=1) * individual[i]
                weighted_test[:, i] = subset_test.mean(axis=1) * individual[i]
            else:
                print(f"Warning: Feature set {i} is empty!")

        # Handle missing values
        imputer = SimpleImputer(strategy='mean')
        weighted_train = imputer.fit_transform(weighted_train)
        weighted_test = imputer.transform(weighted_test)

        # Train Logistic Regression Model
        model = LogisticRegression(max_iter=1000)
        model.fit(weighted_train, y_train)
        predictions = model.predict(weighted_test)
        
        return (accuracy_score(y_test, predictions),)
    
    except Exception as e:
        print(f"Error in eval_weights: {e}")
        return (0.0,)


# Genetic Algorithm Operators
toolbox = base.Toolbox()
toolbox.register("attr_float", random.uniform, 0.1, 1.0)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=IND_SIZE)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", eval_weights)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

# Run Genetic Algorithm
pop = toolbox.population(n=POP_SIZE)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("max", np.max)

algorithms.eaSimple(pop, toolbox, cxpb=CX_PROB, mutpb=MUT_PROB, ngen=NGEN, stats=stats, halloffame=hof, verbose=True)

# Save Best Weights
best_weights = hof[0]
weights_output_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights_GA(2nd).csv"

weights_df = pd.DataFrame({
    'Feature_Set': [f"set{i}" for i in range(num_feature_sets)],
    'Weight': best_weights
})
weights_df.to_csv(weights_output_path, index=False)

print(f"Optimized weights saved to: {weights_output_path}")
print(f"Best Accuracy Achieved: {hof[0].fitness.values[0]}")


**Models Training on GA optimizaed Weights**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

# File paths for feature vectors and weights
file_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

weights_csv_path =r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights_GA(2nd).csv"

# Load the weights
weights_df = pd.read_csv(weights_csv_path)
weights = weights_df['Weight'].values

# Step 1: Load and Standardize Column Names
feature_sets = []
meta_data = None  # To store 'filename' and 'category' once, assuming they are consistent

# Load and preprocess feature sets
for path in tqdm(file_paths, desc="Loading Feature Sets", unit="file"):
    df = pd.read_csv(path)
    
    # Rename columns for consistency
    df.rename(columns={'Filename': 'filename', 'Labels': 'category'}, inplace=True)
    
    # Separate metadata (filename and category) from features
    if meta_data is None:
        meta_data = df[['filename', 'category']]
    else:
        df = df.drop(columns=['filename', 'category'], errors='ignore')
    
    # Ensure all feature columns are numeric
    df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
    
    # Add to feature sets list
    feature_sets.append(df)

# Step 2: Apply Weights and Merge Feature Sets
weighted_features = None

for i, (features, weight) in tqdm(enumerate(zip(feature_sets, weights)), desc="Applying Weights", total=len(feature_sets)):
    weighted_set = features.add_prefix(f"set{i}_") * weight  # Apply weight
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Add labels back to the dataset
final_dataset = pd.concat([meta_data, weighted_features], axis=1)

# Step 3: Prepare Data for Training
labels = final_dataset['category']
features = final_dataset.drop(columns=['filename', 'category'], errors='ignore')

# Encode labels
labels = pd.factorize(labels)[0]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Step 4: Train a Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Evaluate the Model on Training and Testing Sets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Training accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy:.4f}")

# Testing accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Step 6: Detailed Performance on Testing Set
print("\nClassification Report (Testing Set):")
print(classification_report(y_test, y_test_pred))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

# Load weights
weights_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights_GA(2nd).csv"  # Update with your weights file path
weights = pd.read_csv(weights_path)["Weight"].values

# Load and combine feature sets
feature_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]


feature_sets = [pd.read_csv(path) for path in feature_paths]

# Extract labels and remove unnecessary columns
labels = feature_sets[0]["category"]  # Assuming all files share the same labels
for i in range(len(feature_sets)):
    feature_sets[i] = feature_sets[i].drop(columns=["filename", "category"], errors="ignore")
    feature_sets[i] = feature_sets[i].apply(pd.to_numeric, errors='coerce')  # Convert to numeric
    feature_sets[i] = feature_sets[i].fillna(0)  # Fill NaN with 0

# Apply weights to features
weighted_features = None
for i, (features, weight) in tqdm(enumerate(zip(feature_sets, weights)), desc="Applying Weights", total=len(feature_sets)):
    weighted_set = features.add_prefix(f"set{i}_") * weight  # Apply weight
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Ensure data alignment
assert len(weighted_features) == len(labels), "Features and labels must have the same number of samples."

# Normalize features
scaler = RobustScaler()
weighted_features = scaler.fit_transform(weighted_features)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
num_classes = len(np.unique(encoded_labels))
y_categorical = to_categorical(encoded_labels, num_classes=num_classes)

# Reshape features for CNN input (assuming 1D convolution)
X_train, X_test, y_train, y_test = train_test_split(weighted_features, y_categorical, test_size=0.2, random_state=42, stratify=encoded_labels)
X_train = X_train[..., np.newaxis]  # Add channel dimension for CNN
X_test = X_test[..., np.newaxis]

# Simplified CNN model to reduce memory consumption
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train with smaller batch size
history = model.fit(X_train, y_train, epochs=150, batch_size=16, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")


# Predictions and classification report
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.svm import SVC

# Load weights
weights_path = r"C:\Users\123\OneDrive\Desktop\features extraction\weights optimization\optimized_feature_set_weights_GA(2nd).csv"  # Update with your weights file path
weights = pd.read_csv(weights_path)["Weight"].values

# Load and combine feature sets
feature_paths = [
    r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\glcm_swt_dwt_merged\combined_features.csv",
    r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
]

feature_sets = [pd.read_csv(path) for path in feature_paths]

# Extract labels and preprocess features
labels = feature_sets[0]["category"]  # Assuming all files share the same labels
for i in range(len(feature_sets)):
    feature_sets[i] = feature_sets[i].drop(columns=["filename", "category"], errors="ignore")
    feature_sets[i] = feature_sets[i].apply(pd.to_numeric, errors='coerce')  # Convert to numeric
    feature_sets[i] = feature_sets[i].fillna(0)  # Fill NaN with 0

# Apply weights to features
weighted_features = None
for i, (features, weight) in enumerate(zip(feature_sets, weights)):
    weighted_set = features.add_prefix(f"set{i}_") * weight  # Apply weight
    weighted_features = pd.concat([weighted_features, weighted_set], axis=1) if weighted_features is not None else weighted_set

# Normalize features
scaler = RobustScaler()
weighted_features = scaler.fit_transform(weighted_features)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(weighted_features, encoded_labels, test_size=0.2, random_state=42, stratify=encoded_labels)

# Train the SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# Calculate training accuracy
y_train_pred = svm_model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy:.4f}")

# Calculate testing accuracy
y_test_pred = svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))