New Code


In [None]:
!pip install --upgrade scikit-learn imbalanced-learn


In [None]:
!pip install imbalanced-learn==0.8.0


In [None]:
import cv2
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from imblearn.under_sampling import RandomUnderSampler

# Function to preprocess an image and extract features
def preprocess_image(image_path, target_size=(100, 100)):
    image = cv2.imread(image_path)
    if image is None or image.size == 0:
        print(f"Error loading image: {image_path}")
        return None, None

    # Resize the image
    image = cv2.resize(image, target_size)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply histogram equalization
    equalized = cv2.equalizeHist(gray)

    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(equalized, (5, 5), 0)

    # Detect edges using Canny
    edges = cv2.Canny(blurred, 50, 150)

    return equalized, edges

# Function to extract features from an image
def extract_features(image):
    features = []

    # Histogram features
    hist = cv2.calcHist([image], [0], None, [256], [0, 256])
    features.extend(hist.flatten())

    # Edge features
    edges = cv2.Canny(image, 50, 150)
    edge_hist = cv2.calcHist([edges], [0], None, [256], [0, 256])
    features.extend(edge_hist.flatten())

    return features

# Path to the dataset directory
dataset_path = 'dataset_full'
output_path = 'output_directory'

# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Preprocess and save images
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    output_class_dir = os.path.join(output_path, class_name)
    os.makedirs(output_class_dir, exist_ok=True)

    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        equalized, edges = preprocess_image(image_path)
        if equalized is None:
            continue

        output_image_path = os.path.join(output_class_dir, image_name)
        cv2.imwrite(output_image_path, equalized)

# Load the preprocessed images and extract features
X = []
y = []
for class_name in os.listdir(output_path):
    class_path = os.path.join(output_path, class_name)
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        equalized, edges = preprocess_image(image_path)
        if equalized is None:
            print(f"Error loading image: {image_path}")
            continue

        # Extract features for the image
        features = extract_features(equalized)
        X.append(features)
        y.append(class_name)

# Convert the list of feature arrays into a single 2D array
X = np.array(X)
y = np.array(y)

# Perform random under-sampling
under_sampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = under_sampler.fit_resample(X, y)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=50)  # Adjust the number of components as needed
X_pca = pca.fit_transform(X_resampled)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_resampled, test_size=0.2, random_state=42)

# Train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


!pip install lightgbm

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100)

# Perform cross-validation
# X: Features, y: Target variable, cv: Number of folds
# scoring: Evaluation metric (e.g., accuracy, precision, recall, etc.)
scores = cross_val_score(clf2, X, y, cv=5, scoring='accuracy')

# Print cross-validation scores
print("Cross-validation scores:", scores)

# Calculate and print the mean and standard deviation of cross-validation scores
print("Mean accuracy:", scores.mean())
print("Standard deviation of accuracy:", scores.std())


In [None]:
import numpy as np

# Get unique values and their counts
unique_values, counts = np.unique(y, return_counts=True)
for value, count in zip(unique_values, counts):
    print(f"{value}: {count}")


In [None]:
import lightgbm as lgb
from sklearn.svm import SVC



In [None]:
clf1=lgb.LGBMClassifier()
clf1.fit(X_train,y_train)

clf2=SVC(kernel='rbf')
clf2.fit(X_train,y_train)



In [None]:
from sklearn.metrics import classification_report, confusion_matrix
y_pred1=clf1.predict(X_test)
y_pred2=clf2.predict(X_test)


In [None]:
accuracy = accuracy_score(y_test, y_pred1)
accuracy_percentage = accuracy * 100
print(f"Accuracy: {accuracy_percentage:.2f}%")
# print(f"Accuracy: {accuracy}")

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred1))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred1))

In [None]:
accuracy = accuracy_score(y_test, y_pred2)
accuracy_percentage = accuracy * 100
print(f"Accuracy: {accuracy_percentage:.2f}%")
# print(f"Accuracy: {accuracy}")

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred2))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred2))

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define the parameter grid to search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': [0.001, 0.01, 0.1, 1],  # Kernel coefficient
    'kernel': ['linear', 'rbf', 'poly']  # Kernel type
}

# Create an SVM classifier
svm = SVC()

# Create a GridSearchCV object
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')

# Perform grid search
grid_search.fit(X_train, y_train)

# Print the best parameters found
print("Best parameters:", grid_search.best_params_)

# Print the best cross-validation score found
print("Best cross-validation score:", grid_search.best_score_)

# Get the best estimator
best_svm = grid_search.best_estimator_

# Evaluate the best estimator on the test set
test_score = best_svm.score(X_test, y_test)
print("Test set accuracy:", test_score)


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy_percentage = accuracy * 100
print(f"Accuracy: {accuracy_percentage:.2f}%")
# print(f"Accuracy: {accuracy}")

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


In [None]:
from joblib import load,dump

# Load the model from a pickled file
dump(clf2,"my_model.pkl")
load_mod=load("my_model.pkl")


import shutil

folder_path = '/content/extracted_data'
zip_path = '/content/extracted_data.zip'
shutil.make_archive(zip_path, 'zip', folder_path)