In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation, Conv2D, MaxPooling2D

In [None]:
# Load data and labels
path = "../input/"
train_path = os.path.join(path, 'train/')
df = pd.DataFrame({'path': glob(os.path.join(train_path, '*.tif'))})
df['id'] = df.path.map(lambda x: os.path.splitext(os.path.basename(x))[0])
labels = pd.read_csv(os.path.join(path, "train_labels.csv"))
df = df.merge(labels, on="id")

In [None]:
# Load sample images
N = 10000
X, y = [], []
for i, row in tqdm(df.iterrows(), total=N):
    img = cv2.imread(row['path'])
    X.append(img)
    y.append(row['label'])
    if i == N - 1:
        break

X, y = np.array(X), np.array(y)

In [None]:
# Display a few sample images with labels
plt.figure(figsize=(10, 4), dpi=150)
for i in range(8):
    plt.subplot(2, 4, i + 1)
    plt.imshow(X[i])
    plt.title(f'Label: {y[i]}')
    plt.xticks([]), plt.yticks([])
plt.suptitle('Sample Images with Labels')
plt.show()

In [None]:
# Display class distribution
plt.figure(figsize=(4, 2), dpi=150)
plt.bar(['Positive', 'Negative'], [(y == 1).sum(), (y == 0).sum()])
plt.ylabel("# of samples")
plt.title("Class Distribution")
plt.show()

In [None]:
# Display pixel value distributions
def plot_pixel_value_distributions(samples, title):
    plt.figure(figsize=(8, 6), dpi=150)
    for i in range(3):
        plt.subplot(2, 2, i + 1)
        plt.hist(samples[:, :, :, i].flatten(), bins=256, density=True, color=['red', 'green', 'blue'][i])
        plt.ylabel("Relative frequency")
        plt.xlabel("Pixel value")
        plt.title(f"{['Red', 'Green', 'Blue'][i]} Channel")
    plt.subplot(2, 2, 4)
    plt.hist(samples.flatten(), bins=256, density=True, color='gray')
    plt.ylabel("Relative frequency")
    plt.xlabel("Pixel value")
    plt.title("RGB Channel")
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

positive_samples = X[y == 1]
negative_samples = X[y == 0]
plot_pixel_value_distributions(positive_samples, 'Pixel Value Distributions - Positive Samples')
plot_pixel_value_distributions(negative_samples, 'Pixel Value Distributions - Negative Samples')

In [None]:
N = df["path"].size
X, y = [], []
for i, row in tqdm(df.iterrows(), total=N):
    img = cv2.imread(row['path'])
    X.append(img)
    y.append(row['label'])

X, y = np.array(X), np.array(y)

# Split data into training and validation sets using train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Model definition
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(96, 96, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))


In [None]:
# Model compilation
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model training
batch_size = 50
epochs = 3
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)

# Model evaluation
loss, acc = model.evaluate(X_val, y_val)
print("Validation Loss:", loss)
print("Validation Accuracy:", acc)