**models training on DWT features**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load extracted features
input_csv_path = r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["category", "filename"])  # Drop non-numeric columns
y = df["category"]

# Step 2: Preprocess the data
# Normalize the features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Step 3: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
rf_model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred_train = rf_model.predict(X_train)
y_pred_test = rf_model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print accuracy scores
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test, target_names=label_encoder.classes_))

# Step 5: Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the extracted features
features_csv_path = r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv"
data = pd.read_csv(features_csv_path)

# Separate features (X) and labels (y)
X = data.drop(columns=["category", "filename"])  # Drop non-feature columns
y = data["category"]

# Split the data into training and test sets (stratify ensures balanced splits)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=120, stratify=y)

# Standardize features (important for SVM performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM parameter grid for optimization
param_grid = {
    'C': [0.1, 1, 10, 100],         # Regularization parameter
    'kernel': ['linear', 'rbf'],    # Linear and RBF kernels
    'gamma': ['scale', 'auto'],     # Kernel coefficient
}

# Initialize the SVM classifier and GridSearchCV
svm = SVC(probability=True)
grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    cv=3,                           # 3-fold cross-validation for efficiency
    scoring='accuracy',             # Use accuracy as the scoring metric
    verbose=2,
    n_jobs=-1                       # Use all available CPU cores
)

# Train the SVM model with grid search
print("Training the SVM model...")
grid_search.fit(X_train_scaled, y_train)

# Get the best model from grid search
best_svm = grid_search.best_estimator_
print("\nBest SVM Parameters:", grid_search.best_params_)

# Evaluate the model on the test set
print("\nEvaluating the model...")
y_pred_train = best_svm.predict(X_train_scaled)
y_pred_test = best_svm.predict(X_test_scaled)

# Calculate accuracy
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test))

# Save the trained SVM model and scaler for future use
joblib.dump(best_svm, r"C:\Users\123\OneDrive\Desktop\svm_model.pkl")
joblib.dump(scaler, r"C:\Users\123\OneDrive\Desktop\scaler.pkl")

print("\nModel training and saving completed.")

# Step 5: Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=y.unique(), yticklabels=y.unique())
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path =  r"C:\Users\123\OneDrive\Desktop\improved_wavelet_features.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata columns
y = data['category']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



**Models training on SWT features**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import joblib

# Path to the feature CSV file
csv_path =  r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features and target labels
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata
y = data['category']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Predict the classes for the training set (for training accuracy)
y_train_pred = rf_classifier.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Predict the classes for the test set (for testing accuracy)
y_test_pred = rf_classifier.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Print the training and testing accuracy
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

# Evaluate the classifier's performance on the test set
print("\nClassification Report (Test Set):\n", classification_report(y_test, y_test_pred))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# Load the extracted features
input_csv_path = r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["category", "filename"])  # Drop non-feature columns
y = df["category"]  # Labels

# Encode the class labels into numeric form
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Perform feature selection (e.g., top 50 features based on ANOVA F-test)
selector = SelectKBest(score_func=f_classif, k=50)
X_selected = selector.fit_transform(X, y_encoded)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM model
svm = SVC(random_state=42, probability=True)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}

grid_search = GridSearchCV(svm, param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Get the best model from GridSearchCV
best_svm = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate the model on the training set (for training accuracy)
y_train_pred = best_svm.predict(X_train_scaled)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"\nTraining Accuracy: {train_accuracy * 100:.2f}%")

# Evaluate the model on the test set (for testing accuracy)
y_test_pred = best_svm.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Generate and display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

# Print the classification report
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path = r"C:\Users\123\OneDrive\Desktop\features extraction\SWT\SWT_features_with_labels.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata columns
y = data['category']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



**Models Training on glcm features**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the extracted features
features_csv_path = r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv"
data = pd.read_csv(features_csv_path)

# Separate features (X) and labels (y)
X = data.drop(columns=["filename", "category"])  # Drop non-feature columns
y = data["category"]

# Split the data into training and test sets (stratify ensures balanced splits)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=120, stratify=y)

# Standardize features (important for SVM performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM parameter grid for optimization
param_grid = {
    'C': [0.1, 1, 10, 100],         # Regularization parameter
    'kernel': ['linear', 'rbf'],    # Linear and RBF kernels
    'gamma': ['scale', 'auto'],     # Kernel coefficient
}

# Initialize the SVM classifier and GridSearchCV
svm = SVC(probability=True)
grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    cv=3,                           # 3-fold cross-validation
    scoring='accuracy',             
    verbose=2,
    n_jobs=-1                       
)

# Train the SVM model with grid search
print("Training the SVM model...")
grid_search.fit(X_train_scaled, y_train)

# Get the best model from grid search
best_svm = grid_search.best_estimator_
print("\nBest SVM Parameters:", grid_search.best_params_)

# Evaluate the model on the test set
print("\nEvaluating the model...")
y_pred_train = best_svm.predict(X_train_scaled)
y_pred_test = best_svm.predict(X_test_scaled)

# Calculate accuracy
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
class_report = classification_report(y_test, y_pred_test, output_dict=True)
print("\nClassification Report:\n", classification_report(y_test, y_pred_test))

# Save the trained SVM model and scaler for future use
joblib.dump(best_svm, r"C:\Users\123\OneDrive\Desktop\glcm_svm_model.pkl")
joblib.dump(scaler, r"C:\Users\123\OneDrive\Desktop\glcm_scaler.pkl")

print("\nModel training and saving completed.")

# Step 5: Plot Confusion Matrix
plt.figure(figsize=(8, 6))
conf_matrix = confusion_matrix(y_test, y_pred_test)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=y.unique(), yticklabels=y.unique())
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()

# Step 7: Plot Precision, Recall, and F1-score per class
report_df = pd.DataFrame(class_report).transpose().iloc[:-1, :]  # Exclude 'accuracy' row
report_df[['precision', 'recall', 'f1-score']].plot(kind='bar', figsize=(10, 6))
plt.title("Precision, Recall & F1-score per Class")
plt.xlabel("Classes")
plt.ylabel("Score")
plt.xticks(rotation=45)
plt.legend(loc='lower right')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load extracted features
input_csv_path =  r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["category", "filename"])  # Drop non-numeric columns
y = df["category"]

# Step 2: Preprocess the data
# Normalize the features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Step 3: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
rf_model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred_train = rf_model.predict(X_train)
y_pred_test = rf_model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print accuracy scores
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test, target_names=label_encoder.classes_))

# Step 5: Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path =  r"C:\Users\123\OneDrive\Desktop\custom\glcm_features.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata columns
y = data['category']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



**Models Trainig on SWT+GLCM Features**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load extracted features
input_csv_path =  r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["category", "filename"])  # Drop non-numeric columns
y = df["category"]

# Step 2: Preprocess the data
# Normalize the features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Step 3: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
rf_model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred_train = rf_model.predict(X_train)
y_pred_test = rf_model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print accuracy scores
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test, target_names=label_encoder.classes_))

# Step 5: Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the dataset
input_csv_path = r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv"  # Path to the feature file
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["filename", "category"])  # Drop non-feature columns
y = df["category"]  # Labels

# Encode the labels into numeric form
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=100, stratify=y_encoded)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM model
svm = SVC(probability=True, random_state=100)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Best parameters and the corresponding SVM model
best_params = grid_search.best_params_
best_svm = grid_search.best_estimator_

# Evaluate the model on the test set
y_train_pred = best_svm.predict(X_train_scaled)
y_test_pred = best_svm.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)
plt.figure(figsize=(10, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# Save the trained model and the scaler
joblib.dump(best_svm, r"C:\Users\123\OneDrive\Desktop\custom\(svm+glcm)svm_model.pkl")
joblib.dump(scaler, r"C:\Users\123\OneDrive\Desktop\custom\(svm+glcm)scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

print("\nSVM model, scaler, and label encoder saved successfully.")


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path = r"C:\Users\123\OneDrive\Desktop\custom\swt_glcm_features.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata columns
y = data['category']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



**Model training on dwt+glcm features**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the dataset
input_csv_path = r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv"  # Path to the feature file
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["filename", "category"])  # Drop non-feature columns
y = df["category"]  # Labels

# Encode the labels into numeric form
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=100, stratify=y_encoded)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM model
svm = SVC(probability=True, random_state=100)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Best parameters and the corresponding SVM model
best_params = grid_search.best_params_
best_svm = grid_search.best_estimator_

# Evaluate the model on the test set
y_train_pred = best_svm.predict(X_train_scaled)
y_test_pred = best_svm.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)
plt.figure(figsize=(10, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# Save the trained model and the scaler
joblib.dump(best_svm, r"C:\Users\123\OneDrive\Desktop\custom\dwt+glcm_svm_model.pkl")
joblib.dump(scaler, r"C:\Users\123\OneDrive\Desktop\custom\dwt+glcm_scaler.pkl")
joblib.dump(label_encoder, "label_encoder_dwt+glcm.pkl")

print("\nSVM model, scaler, and label encoder saved successfully.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load extracted features
input_csv_path =  r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["category", "filename"])  # Drop non-numeric columns
y = df["category"]

# Step 2: Preprocess the data
# Normalize the features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

# Step 3: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
rf_model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred_train = rf_model.predict(X_train)
y_pred_test = rf_model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

# Print accuracy scores
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Print classification report
print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test, target_names=label_encoder.classes_))

# Step 5: Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Step 6: Plot Training vs Testing Accuracy
plt.figure(figsize=(6, 4))
plt.bar(["Training Accuracy", "Testing Accuracy"], [train_accuracy, test_accuracy], color=['blue', 'orange'])
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.title('Training vs Testing Accuracy')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path = r"C:\Users\123\OneDrive\Desktop\custom\dwt_glcm_features.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['category'] = label_encoder.fit_transform(data['category'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['category', 'filename'])  # Drop target and metadata columns
y = data['category']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()



**Models Training on CNN features**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Step 1: Load the Extracted Features and Labels
csv_file_path = r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv" # Update your file path
data = pd.read_csv(csv_file_path)

# Separate features and labels
X = data.drop(columns=['Label', 'Filename'])  # Drop class and image name columns
y = data['Label']  # Target labels (class)

# Step 2: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 3: Initialize and Train Random Forest Model
print("Training Random Forest classifier...")
rf_model = RandomForestClassifier(
    n_estimators=100,  # Number of trees
    random_state=42,
    n_jobs=-1,  # Use all available cores
    class_weight="balanced"  # Handle class imbalances if any
)
rf_model.fit(X_train, y_train)

# Step 4: Evaluate the Model on the Training Set
y_train_pred = rf_model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")

# Step 5: Evaluate the Model on the Test Set
y_test_pred = rf_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))

# Step 6: Compute and plot confusion matrix
cm = confusion_matrix(y_test, y_test_pred)
labels = sorted(y.unique())  # Get class labels from the dataset

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()




In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Path to the feature CSV file
csv_path = r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"

# Load the extracted features dataset
data = pd.read_csv(csv_path)

# Encode the class labels into numerical values
label_encoder = LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

# Separate features (X) and target labels (y)
X = data.drop(columns=['Label', 'Filename'])  # Drop target and metadata columns
y = data['Label']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# One-hot encode the labels for CNN
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes)

# Reshape the features for CNN input (add a channel dimension)
X_scaled = X_scaled[..., np.newaxis]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_categorical, test_size=0.2, random_state=42, stratify=y
)

# Define the shallow CNN model
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=250, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print training and testing accuracy
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_true_classes = np.argmax(y_test, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test_true_classes, y_test_pred_classes, target_names=label_encoder.classes_))

# Compute and plot confusion matrix
cm = confusion_matrix(y_test_true_classes, y_test_pred_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Save the model
model_save_path = r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\shallow_cnn_model.h5"
model.save(model_save_path)
print("\nModel saved to:", model_save_path)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Load the extracted features
input_csv_path = r"C:\Users\123\OneDrive\Desktop\features extraction\cnn\CNN_fruit_features_filtered.csv"
df = pd.read_csv(input_csv_path)

# Separate features and labels
X = df.drop(columns=["Label", "Filename"])  # Drop non-feature columns
y = df["Label"]  # Labels

# Encode the class labels into numeric form
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 🔹 **Fix 1: Apply PCA for Dimensionality Reduction (Alternative to SelectKBest)**
pca = PCA(n_components=200)  # Reduce to 200 principal components
X_reduced = pca.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM model
svm = SVC(random_state=42, probability=True)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}

grid_search = GridSearchCV(svm, param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Get the best model from GridSearchCV
best_svm = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate the model on the training set
y_train_pred = best_svm.predict(X_train_scaled)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"\nTraining Accuracy: {train_accuracy * 100:.2f}%")

# Evaluate the model on the test set
y_test_pred = best_svm.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Generate and display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

# Print the classification report
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))
