In [1]:
import cv2
import os
import numpy as np

def load_images_from_directory(directory, target_size=(100, 100)):
    file_names=[]
    images = []
    filenames = sorted([f for f in os.listdir(directory) if f.endswith(".jpg") or f.endswith(".png")])
    for filename in filenames:
        img_path = os.path.join(directory, filename)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if img is not None:
            img_resized = cv2.resize(img, target_size)  
            images.append(img_resized)
            file_names.append(filename)
    return np.array(images), np.array(file_names)

In [None]:
train_images, file_names=load_images_from_directory('/kaggle/input/severstal-steel-defect-detection/train_images', (384, 96))

In [None]:
import pandas as pd
train_csv=pd.read_csv('/kaggle/input/severstal-steel-defect-detection/train.csv')

In [None]:
train_csv=train_csv.sort_values(by='ImageId', ascending=True)
train_csv.head(5)

In [None]:
# Convert file_names to a set for O(1) lookups
file_names_set = set(file_names)

# Create a mask to find entries in train_csv that are also in file_names
mask = train_csv['ImageId'].isin(file_names_set)

# Filter train_csv to keep only common entries
filtered_train_csv = train_csv[mask]

# Convert filtered ImageId column to a list for easy indexing
filtered_image_ids = filtered_train_csv['ImageId'].tolist()

# Filter file_names and train_images to keep only those present in filtered_image_ids
filtered_file_names = np.array([name for name in file_names if name in filtered_image_ids])
filtered_train_images = np.array([train_images[i] for i in range(len(file_names)) if file_names[i] in filtered_image_ids])
del file_names
del train_csv
del train_images
del mask

In [None]:
filtered_train_csv=filtered_train_csv.drop_duplicates(subset=['ImageId'])
print(len(filtered_train_csv), filtered_train_images.shape, len(filtered_file_names))

In [None]:
from tensorflow import keras
from keras.layers import Dense, Input, GlobalAveragePooling2D, Dropout, BatchNormalization, PReLU
from keras import Model
from keras.applications import Xception
conv_base=Xception(weights='imagenet', include_top=False, input_shape=(96, 384, 3))
input_layer=Input(shape=(96, 384, 3))

x=conv_base(input_layer)
x=GlobalAveragePooling2D()(x)
x=BatchNormalization()(x)

x=Dense(units=8192)(x)
x=PReLU()(x)
x=BatchNormalization()(x)
x=Dropout(0.2)(x)

nodes=2048
while (nodes>4):
    x=Dense(units=nodes)(x)
    x=PReLU()(x)
    x=BatchNormalization()(x)
    x=Dropout(0.2)(x)
    nodes=int(nodes/8)
    
x=Dense(units=4, activation='softmax')(x)

model=Model(inputs=input_layer, outputs=x)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
filtered_train_images=filtered_train_images/255
y=np.array(filtered_train_csv['ClassId'])

In [None]:
def one_hot_encode(labels, num_classes=4):
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size), labels-1] = 1
    return one_hot_labels
y_one_hot=one_hot_encode(y)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(filtered_train_images,y_one_hot,test_size=0.2)#y_test is one hot encoded
del filtered_train_images
del y_one_hot

In [None]:
from keras.callbacks import EarlyStopping
early_stopping=EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
model.fit(x_train, y_train, epochs=500, callbacks=[early_stopping])

In [None]:
y_pred_one_hot=model.predict(x_test)
y_pred=np.zeros(y_test.shape)
for i in range(len(y_test)):
    y_pred[i]=y_pred_one_hot[i].argmax()

In [None]:
y_test_actual=np.zeros(len(y_test))
for i in len(y_test):
    y_test_actual[i]=y_test[i].argmax()

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test_actual, y_pred)
print(accuracy)