<a href="https://colab.research.google.com/github/AditiGothwal02/Skin-Cancer-detection-project-/blob/main/spare_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download nightfury007/ham10000-isic2018-raw

Dataset URL: https://www.kaggle.com/datasets/nightfury007/ham10000-isic2018-raw
License(s): Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)


In [None]:
!unzip ham10000-isic2018-raw.zip

In [None]:
!pip install tensorflow



In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
import matplotlib.pyplot as plt

# Set dataset directory
dataset_dir = 'dataverse_files'

# Load metadata
metadata = pd.read_csv(os.path.join(dataset_dir, 'HAM10000_metadata'))

In [None]:
print("Missing values in metadata:\n", metadata.isnull().sum())

Missing values in metadata:
 lesion_id        0
image_id         0
dx               0
dx_type          0
age             57
sex              0
localization     0
dataset          0
dtype: int64


In [None]:
metadata = metadata.dropna()

In [None]:
print("Missing values in metadata:\n", metadata.isnull().sum())

Missing values in metadata:
 lesion_id       0
image_id        0
dx              0
dx_type         0
age             0
sex             0
localization    0
dataset         0
dtype: int64


In [None]:
le = LabelEncoder()
metadata['encoded_labels'] = le.fit_transform(metadata['dx'])

# Data cleaning: Remove duplicates
metadata = metadata.drop_duplicates(subset='image_id')

In [None]:
# Data balancing: Ensure each class has the same number of samples
def balance_data(df, target_col, n_samples):
    balanced_df = pd.DataFrame()
    for label in df[target_col].unique():
        label_df = df[df[target_col] == label]
        if len(label_df) > n_samples:
            label_df = label_df.sample(n_samples, random_state=42)
        balanced_df = pd.concat([balanced_df, label_df])
    return balanced_df

# Determine the minimum number of samples in any class
min_samples = metadata['encoded_labels'].value_counts().min()

In [None]:
# Balance the dataset
metadata_balanced = balance_data(metadata, 'encoded_labels', min_samples)

In [None]:

# Define image size and batch size
img_size = (224, 224)
batch_size = 32

# Initialize ImageDataGenerator with data augmentation
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Function to load and preprocess images
def load_and_preprocess_image(img_path, img_size):
    img = load_img(img_path, target_size=img_size)  # Resize image
    img_array = img_to_array(img)  # Convert to array
    img_array = np.expand_dims(img_array, axis=0)  # Expand dimensions
    img_array = preprocess_input(img_array)  # Normalize for VGG16
    return img_array

# Load images and labels
images = []
labels = []
for index, row in metadata.iterrows():
    img_path = os.path.join(dataset_dir, 'HAM10000_images_combined_600x450', row['image_id'] + '.jpg')
    img_array = load_and_preprocess_image(img_path, img_size)
    images.append(img_array)
    labels.append(row['dx'])

In [None]:
# Convert lists to numpy arrays
images = np.vstack(images)
labels = np.array(labels)

# Encode labels
label_mapping = {label: idx for idx, label in enumerate(np.unique(labels))}
labels_encoded = np.array([label_mapping[label] for label in labels])


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels_encoded, test_size=0.2, random_state=42, stratify=labels_encoded)

In [None]:
from keras.applications.vgg16 import VGG16
from sklearn.utils.class_weight import compute_class_weight
# Feature extraction using VGG16
vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

def extract_features(model, data):
    data = np.array(data)  # Ensure data is a NumPy array
    features = model.predict(data, batch_size=32, verbose=1)  # Reduce batch size
    return features

# Extract features
X_train_features = extract_features(vgg_model, X_train)
X_test_features = extract_features(vgg_model, X_test)


In [None]:
# Compute class weights for balancing
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels_encoded),
    y=labels_encoded
)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

In [None]:
# Train SVM classifier with class weights
svm_classifier = SVC(kernel='linear', probability=True, class_weight=class_weights_dict)
svm_classifier.fit(X_train_features, y_train)

In [None]:
# Predict on test data
y_pred = svm_classifier.predict(X_test_features)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=label_mapping.keys()))

# New Section

In [None]:
!pip install tensorflow
import joblib
import pickle
from tensorflow.keras.models import save_model

# Define save paths
svm_model_path = 'svm_classifier.pkl'
vgg_model_path = 'vgg16_model.h5'
vgg_features_train_path = 'X_train_features.npy'
vgg_features_test_path = 'X_test_features.npy'
y_train_path = 'y_train.npy'
y_test_path = 'y_test.npy'

# Save the trained SVM model
joblib.dump(svm_classifier, svm_model_path)
print(f"SVM model saved to {svm_model_path}")

# Save the VGG16 model
save_model(vgg_model, vgg_model_path)
print(f"VGG16 model saved to {vgg_model_path}")

# Save extracted features and labels
np.save(vgg_features_train_path, X_train_features)
np.save(vgg_features_test_path, X_test_features)
np.save(y_train_path, y_train)
np.save(y_test_path, y_test)
print("Feature arrays and labels saved successfully.")


In [None]:
from tensorflow.keras.models import load_model
# Load the trained models
svm_model_path = 'svm_classifier.pkl'
vgg_model_path = 'vgg16_model.h5'
svm_classifier = joblib.load(svm_model_path)
vgg_model = load_model(vgg_model_path)

# Define image size
img_size = (224, 224)

# Function to preprocess input image
def preprocess_image(img_path):
    img = load_img(img_path, target_size=img_size)  # Load and resize image
    img_array = img_to_array(img)  # Convert to array
    img_array = np.expand_dims(img_array, axis=0)  # Expand dimensions for model input
    img_array = preprocess_input(img_array)  # Normalize
    return img_array

# Function to predict class
def predict_image_class(img_path, label_mapping):
    img_array = preprocess_image(img_path)
    features = vgg_model.predict(img_array)  # Extract features using VGG16
    prediction = svm_classifier.predict(features)  # Predict using SVM
    class_label = list(label_mapping.keys())[list(label_mapping.values()).index(prediction[0])]
    return class_label


# Example usage
img_path = 'ISIC_0024331.jpg'  # Provide path to the input image
predicted_class = predict_image_class(img_path, label_mapping)
print(f'Predicted Class: {predicted_class}')


In [None]:
!pip install streamlit