In [1]:
import os
from PIL import Image
import cv2
import numpy as np

# Base directory containing subdirectories of images
base_dir = "sub_images"

# List to hold image data and labels
images = []
labels = []

# Iterate through each subdirectory in the base directory
for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    
    # Check if it's a directory
    if os.path.isdir(subdir_path):
        # Process each image in the subdirectory
        for filename in os.listdir(subdir_path):
            if filename.endswith(".png"):
                # Construct full file path
                file_path = os.path.join(subdir_path, filename)
                # Open the image with PIL
                with Image.open(file_path) as img:
                    # Convert PIL image to NumPy array
                    img_array = np.array(img)
                    # Convert BGR to RGB
                    img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                    # Convert RGB to Grayscale
                    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
                    
                    # Append the processed image and label
                    images.append(img_gray)
                    labels.append(subdir)

# Convert list to NumPy arrays for machine learning processing
images = np.array(images)
labels = np.array(labels)


In [2]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score

# Flatten the image data into 1D vectors (each image must be a single feature vector)
n_samples = len(images)
data = images.reshape((n_samples, -1))  # Reshape from (n_samples, img_height, img_width) to (n_samples, img_height*img_width)

# Split data into 80% train and 20% test subsets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.20, random_state=42)

# Create a classifier: a support vector classifier
classifier = svm.SVC(kernel='rbf', gamma='scale')  # 'rbf' and 'scale' are default but explicitly mentioned here for clarity

# Train the classifier
classifier.fit(X_train, y_train)

# Predict the labels on the test data
y_pred = classifier.predict(X_test)

# Evaluate the classifier performance
print("Classification report for classifier %s:\n%s\n"
      % (classifier, classification_report(y_test, y_pred)))

Classification report for classifier SVC():
                                     precision    recall  f1-score   support

                              ASC-H       0.42      0.04      0.07       195
                             ASC-US       0.50      0.01      0.02       111
                               HSIL       0.58      0.78      0.67       325
                               LSIL       0.49      0.24      0.33       284
Negative for intraepithelial lesion       0.74      0.95      0.83      1225
                                SCC       0.00      0.00      0.00        34

                           accuracy                           0.69      2174
                          macro avg       0.46      0.34      0.32      2174
                       weighted avg       0.63      0.69      0.62      2174




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
import joblib
joblib.dump(classifier, 'svm_model.joblib')

['svm_model.joblib']

In [4]:
loaded_model = joblib.load('svm_model.joblib')

# Use the loaded model to make predictions
predictions = loaded_model.predict(X_test)

In [5]:
import os
from PIL import Image
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib

# Base directory containing subdirectories of images
base_dir = "sub_images"

# List to hold image data and labels
images = []
labels = []

# Iterate through each subdirectory in the base directory
for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    
    # Check if it's a directory
    if os.path.isdir(subdir_path):
        # Process each image in the subdirectory
        for filename in os.listdir(subdir_path):
            if filename.endswith(".png"):
                # Construct full file path
                file_path = os.path.join(subdir_path, filename)
                # Open the image with PIL
                with Image.open(file_path) as img:
                    # Convert PIL image to NumPy array
                    img_array = np.array(img)
                    # Convert BGR to RGB
                    img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                    # Convert RGB to Grayscale
                    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
                    
                    # Append the processed image and label
                    images.append(img_gray)
                    # Assign new labels based on the directory
                    if subdir == "Negative for intraepithelial lesion":
                        labels.append("sem_doenca")
                    else:
                        labels.append("com_doenca")

# Convert list to NumPy arrays for machine learning processing
images = np.array(images)
labels = np.array(labels)

# Flatten the image data into 1D vectors
n_samples = len(images)
data = images.reshape((n_samples, -1))  # Reshape from (n_samples, img_height, img_width) to (n_samples, img_height*img_width)

# Split data into 80% train and 20% test subsets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.20, random_state=42)

# Optional: scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the SVM model with balanced class weights
model = svm.SVC(kernel='rbf', gamma='scale', class_weight='balanced')
model.fit(X_train_scaled, y_train)

# Save the model
joblib.dump(model, 'svm_model_binario.joblib')

# Load the model
loaded_model = joblib.load('svm_model_binario.joblib')

# Use the loaded model to make predictions
y_pred = loaded_model.predict(X_test_scaled)

# Print classification report
print("Classification report for classifier %s:\n%s\n"
    % (model, classification_report(y_test, y_pred)))


Classification report for classifier SVC(class_weight='balanced'):
              precision    recall  f1-score   support

  com_doenca       0.76      0.83      0.80       949
  sem_doenca       0.86      0.80      0.83      1225

    accuracy                           0.81      2174
   macro avg       0.81      0.82      0.81      2174
weighted avg       0.82      0.81      0.81      2174


