In [8]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import models

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [16]:
%cd /content/drive/My Drive/WNS_Triange

/content/drive/My Drive/WNS_Triange


In [21]:
# Load training data
train_df = pd.read_csv('train/train.csv')
train_images_path = 'train/images/'

# Data preprocessing
train_df['filename'] = train_images_path + train_df['filename']
train_df['label'] = train_df['label'].astype(str)
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Image Data Generator for data augmentation
datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
train_generator = datagen.flow_from_dataframe(train_data, x_col='filename', y_col='label', target_size=(256, 256), batch_size=32, class_mode='binary')
val_generator = datagen.flow_from_dataframe(val_data, x_col='filename', y_col='label', target_size=(256, 256), batch_size=32, class_mode='binary')

# Build CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(train_generator, epochs=1, validation_data=val_generator, callbacks=[early_stopping, model_checkpoint])


Found 6463 validated image filenames belonging to 2 classes.
Found 1616 validated image filenames belonging to 2 classes.




  7/202 [>.............................] - ETA: 10:13 - loss: 3.6917 - accuracy: 0.8929

KeyboardInterrupt: 

In [20]:
# Load the best model
best_model = models.load_model('best_model.h5')

# Load test data
test_df = pd.read_csv('test/test.csv')
test_images_path = 'test/images/'

# Data preprocessing for test set
test_df['filename'] = test_images_path + test_df['filename']
test_generator = datagen.flow_from_dataframe(test_df, x_col='filename', y_col=None, target_size=(256, 256), batch_size=32, class_mode=None, shuffle=False)

# Generate predictions
predictions = best_model.predict(test_generator)
test_df['label'] = (predictions > 0.5).astype(int)

# Save predictions to a submission file
submission = test_df[['image_id', 'label']]
submission.to_csv('submission.csv', index=False)

OSError: No file or directory found at best_model.h5

In [15]:
# Calculate F1 score on validation set
val_predictions = model.predict(train_generator)
val_labels = train_data['label'].values
val_predicted_labels = np.round(val_predictions).astype(int)
f1 = f1_score(val_labels, val_predicted_labels)
print(f"Validation F1 Score: {f1}")



TypeError: Labels in y_true and y_pred should be of the same type. Got y_true=['0' '1'] and y_pred=[0]. Make sure that the predictions provided by the classifier coincides with the true labels.

In [22]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import f1_score

# Load training data
train_df = pd.read_csv('train/train.csv')
train_images_folder = 'train/images/'

# Load test data
test_df = pd.read_csv('test/test.csv')
test_images_folder = 'test/images/'

# Data preprocessing
train_df['filename'] = train_df['filename'].apply(lambda x: train_images_folder + x)
test_df['filename'] = test_df['filename'].apply(lambda x: test_images_folder + x)

# Split the training set into training and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['label'])

# Calculate class weights to handle class imbalance
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(train_data['label']), y=train_data['label'])
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

# Image data generator for data augmentation
datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Model architecture
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Image data generator for training
train_generator = datagen.flow_from_dataframe(
    train_data,
    x_col='filename',
    y_col='label',
    class_mode='raw',
    target_size=(128, 128),
    batch_size=32,
    shuffle=True
)

# Image data generator for validation
val_generator = datagen.flow_from_dataframe(
    val_data,
    x_col='filename',
    y_col='label',
    class_mode='raw',
    target_size=(128, 128),
    batch_size=32,
    shuffle=False
)

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    class_weight=class_weight_dict
)


Found 6463 validated image filenames.
Found 1616 validated image filenames.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
# Predictions on the test set
test_generator = datagen.flow_from_dataframe(
    test_df,
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=(128, 128),
    batch_size=32,
    shuffle=False
)

test_predictions = model.predict(test_generator)

# Convert probabilities to binary predictions
binary_predictions = (test_predictions > 0.5).astype(int)

# Create the submission DataFrame
submission_df = pd.DataFrame({'image_id': test_df['image_id'], 'label': binary_predictions.flatten()})

# Save the submission file
submission_df.to_csv('submission.csv', index=False)

Found 3462 validated image filenames.
