In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator



In [None]:
# Loading the data
train_df = pd.read_csv('/kaggle/input/severstal-steel-defect-detection/train.csv')
train_images_path = '/kaggle/input/severstal-steel-defect-detection/train_images/'

# Fill NaN values with an empty string
train_df['EncodedPixels'].fillna('', inplace=True)


In [None]:
# Preprocessing
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (128, 128))  # Reduce the image size
    image = image / 255.0
    return image


In [None]:
# Preparing images and labels
images = []
labels = []

# Dictionary to hold image paths and corresponding labels
image_dict = {}

for idx, row in train_df.iterrows():
    image_file = row['ImageId']
    class_id = row['ClassId'] - 1  # Classes are 1, 2, 3, 4 in the dataset
    if image_file not in image_dict:
        image_dict[image_file] = np.zeros(4)  # Initialize label array
    if row['EncodedPixels'] != '':
        image_dict[image_file][class_id] = 1  # Set the corresponding class

for image_file, label in image_dict.items():
    image_path = os.path.join(train_images_path, image_file)
    image = preprocess_image(image_path)
    images.append(image)
    labels.append(label)

images = np.array(images)
labels = np.array(labels)


In [None]:
# Splitting data
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)


In [None]:
# ImageDataGenerator for batch processing
train_datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)


In [None]:
# Building the model using transfer learning
base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(128, 128, 3))

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='sigmoid')  # Using sigmoid for multi-label classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Training the model
history = model.fit(train_generator, epochs=25, validation_data=val_generator)

# Evaluating the model
loss, accuracy = model.evaluate(val_generator)
print(f'Validation Accuracy: {accuracy:.4f}')


In [None]:
# Plotting training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Loss')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.title('Accuracy')

plt.show()


In [None]:
# Confusion Matrix and Classification Report
y_pred = model.predict(X_val)
y_pred = (y_pred > 0.5).astype(int)

conf_matrix = multilabel_confusion_matrix(y_val, y_pred)

# Visualize Confusion Matrices with Heatmap
def plot_confusion_matrices(conf_matrix):
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    class_names = [f"Class {i+1}" for i in range(4)]

    for i, ax in enumerate(axes):
        sns.heatmap(conf_matrix[i], annot=True, fmt='d', cmap='Blues', ax=ax, cbar=False, xticklabels=["No", "Yes"], yticklabels=["No", "Yes"])
        ax.set_title(f'Confusion Matrix for {class_names[i]}')
        ax.set_xlabel('Predicted Label')
        ax.set_ylabel('True Label')

    plt.tight_layout()
    plt.show()

plot_confusion_matrices(conf_matrix)

# Classification Report
class_report = classification_report(y_val, y_pred, target_names=[f"Class {i+1}" for i in range(4)])
print(class_report)


In [None]:
# Visualizing some predictions
def plot_predictions(images, true_labels, pred_labels, n=5):
    plt.figure(figsize=(20, 10))
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(images[i])
        plt.title(f"True: {true_labels[i].astype(int)}")
        plt.axis("off")

        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(images[i])
        plt.title(f"Pred: {pred_labels[i].astype(int)}")
        plt.axis("off")

indices = np.random.choice(len(X_val), 5, replace=False)
plot_predictions(X_val[indices], y_val[indices], y_pred[indices])


In [None]:
# Saving the model
model.save('steel_defect_detection_model.h5')
print("Model saved successfully.")
