# <center><mark>TASK 3</mark></center>
### <li><mark>Implement a support vector machine to classify images of cats and dog from the kaggle dataset.</mark></li>

### <ol start="1"><li><mark>Testing and Training Data with 250 Images</li></mark>

### Import libraries &<br><br> Creating helper function to load and preprocess images

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model

# Define paths to your data
train_folder = 'train1'
test_folder = 'test1'

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

# Helper function to load and preprocess images
def load_data(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg'):
            img_path = os.path.join(folder, filename)
            img = image.load_img(img_path, target_size=(224, 224))
            img_data = image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)
            features = model.predict(img_data)
            images.append(features.flatten())
            if 'cat' in filename:
                labels.append(0)  # Label for cat
            elif 'dog' in filename:
                labels.append(1)  # Label for dog
    return np.array(images), np.array(labels)

# Load training data
X_train, y_train = load_data(train_folder)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train the SVM classifier
svm_clf = SVC(kernel='linear', C=1.0)
svm_clf.fit(X_train, y_train)

# Predict on the validation set
y_val_pred = svm_clf.predict(X_val)

# Evaluate the classifier
accuracy = accuracy_score(y_val, y_val_pred)
report = classification_report(y_val, y_val_pred)

print(f'Validation Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 416ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

### Load Test Data &<br><br> Create a dummy sample submission

In [2]:
X_test, _ = load_data(test_folder)  # Test folder does not have labels

# Predict on the test set
y_test_pred = svm_clf.predict(X_test)

# Create a dummy sample submission file with 100 entries
submission = pd.DataFrame({'id': range(1, 101), 'label': y_test_pred})
submission.to_csv('submission_250.csv', index=False)

print('Predictions saved to submission.csv')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 269ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

### <ol start="2"><li><mark>Testing and Training Data with 25000 Images</li></mark>

### Used pre-trained VGG16 model + higher level layers

In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import pickle

# Define paths to your data
train_folder = 'train2'
test_folder = 'test2'

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

# Helper function to preprocess a single image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    return img_data

# Helper function to extract features in batches
def extract_features_in_batches(folder, batch_size=5000):
    filenames = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg')]
    features = []
    labels = []
    for start in range(0, len(filenames), batch_size):
        end = min(start + batch_size, len(filenames))
        batch_files = filenames[start:end]
        batch_images = np.vstack([preprocess_image(f) for f in batch_files])
        batch_features = model.predict(batch_images)
        features.extend(batch_features)
        for filename in batch_files:
            if 'cat' in filename:
                labels.append(0)  # Label for cat
            elif 'dog' in filename:
                labels.append(1)  # Label for dog
        print(f"Processed {end} of {len(filenames)} images.")
    return np.array(features), np.array(labels)

# Extract and save training data features
X_train, y_train = extract_features_in_batches(train_folder)
with open('train_features.pkl', 'wb') as f:
    pickle.dump((X_train, y_train), f)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 2s/step
Processed 5000 of 25000 images.
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 2s/step
Processed 10000 of 25000 images.
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 2s/step
Processed 15000 of 25000 images.
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 2s/step
Processed 20000 of 25000 images.
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 2s/step
Processed 25000 of 25000 images.


### Extractig data and Loading data feature

In [None]:
X_test, _ = extract_features_in_batches(test_folder)
with open('test_features.pkl', 'wb') as f:
    pickle.dump(X_test, f)

# Load test data features
with open('test_features.pkl', 'rb') as f:
    X_test = pickle.load(f)

# Train the SVM classifier
svm_clf = SVC(kernel='linear', C=1.0)
svm_clf.fit(X_train, y_train)

# Predict on the test set
y_test_pred = svm_clf.predict(X_test)

# Create a submission DataFrame
submission = pd.DataFrame({'id': range(1, len(y_test_pred) + 1), 'label': y_test_pred})
submission.to_csv('submission_25000.csv', index=False)

print('Predictions saved to submission.csv')

### <ol start="3"><li><mark>Function to predict a single image from the Test Data</mark></li>

In [None]:
import os
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
from tkinter import Tk
from tkinter.filedialog import askopenfilenames

def predict_image(image_path):
    img = image.load_img(image_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    features = features.flatten()
    prediction = svm_clf.predict([features])
    label = 'cat' if prediction[0] == 0 else 'dog'
    return label

# Initialize Tkinter and hide the root window
root = Tk()
root.withdraw()  # Hide the main window

# Define the initial directory
initial_dir = "test2"  # You can change this to "test2" or any other directory

# Open the file dialog and get the image paths
image_paths = askopenfilenames(initialdir=initial_dir, title="Select image files",
                               filetypes=(("Image files", "*.jpg;*.jpeg;*.png"), ("All files", "*.*")))

# Process each selected image
if image_paths:
    for image_path in image_paths:
        result = predict_image(image_path)
        print(f'The image {os.path.basename(image_path)} is a: {result}')
else:
    print("No image files selected.")

# Run the Tkinter main loop
root.mainloop()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step
The image 1.jpg is a: dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step
The image 2.jpg is a: dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255ms/step
The image 3.jpg is a: dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273ms/step
The image 6.jpg is a: cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292ms/step
The image 7.jpg is a: cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
The image 9.jpg is a: cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279ms/step
The image 12.jpg is a: dog


# <center><mark>Thankyou</mark></center>