In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.chdir("/content/drive/My Drive/ML/data/Dataset/")

In [None]:
# Function to load images and labels
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            img = cv2.resize(img, (224, 224))  # Resize images to 224x224 for VGG16
            images.append(img)  # Keep original image for visualization
            labels.append(label)
    return images, labels

In [None]:
cat_images, cat_labels = load_images_from_folder(r'/content/drive/My Drive/ML/data/Dataset/test1', 0)
dog_images, dog_labels = load_images_from_folder(r'/content/drive/My Drive/ML/data/Dataset/train', 1)

In [None]:
images = cat_images + dog_images
labels = cat_labels + dog_labels

In [None]:
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
def extract_features(image):
    img = img_to_array(image)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = base_model.predict(img)
    return features.flatten()

In [None]:
X = np.array([extract_features(img) for img in images])
y = np.array(labels)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from imblearn.over_sampling import SMOTE

# Sample Data (use your own data here)
X = np.random.rand(310, 512)  # 310 samples, 512 features
y = np.random.choice([0, 1], size=310, p=[0.8, 0.2])  # Imbalanced binary classification

# Stratified split to maintain class distribution
stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in stratified_split.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

print(f'Unique classes in y_train: {np.unique(y_train)}')
print(f'Unique classes in y_test: {np.unique(y_test)}')

# Check if balancing is needed
if len(np.unique(y_train)) < 2:
    print("Balancing the training set using SMOTE...")
    smote = SMOTE(random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)
    print(f'Unique classes in y_train after SMOTE: {np.unique(y_train)}')

# Pipeline for scaling and SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(probability=True))
])

# Parameter grid for GridSearch
param_grid = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__gamma': [1, 0.1, 0.01, 0.001],
    'svm__kernel': ['rbf']
}

# Grid search with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Evaluate the model
y_pred = grid_search.predict(X_test)
y_pred_prob = grid_search.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred))

# Best parameters
print("Best parameters found: ", grid_search.best_params_)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

# Grid Search Results
results = pd.DataFrame(grid_search.cv_results_)
pivot_table = results.pivot("param_svm__C", "param_svm__gamma", "mean_test_score")
sns.heatmap(pivot_table, annot=True, fmt=".3f", cmap="viridis")
plt.title('Grid Search Results')
plt.xlabel('Gamma')
plt.ylabel('C')
plt.show()


NameError: name 'X_train' is not defined

In [None]:
# Use the fitted grid_search object to predict
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
def plot_predictions(original_images, predictions, true_labels):
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    axes = axes.ravel()
    for i in range(10):
        axes[i].imshow(cv2.cvtColor(original_images[i], cv2.COLOR_BGR2RGB))
        axes[i].set_title(f'Pred: {predictions[i]}\nTrue: {true_labels[i]}')
        axes[i].axis('off')
    plt.subplots_adjust(hspace=0.5)
    plt.show()

In [None]:
plot_predictions(images[:100], y_pred[:100], y_test[:100])