In [7]:
pip install tensorflow pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2

In [9]:
base_dir = r"C:\Users\yogit\OneDrive\Desktop\sds---try"  
train_csv_path = os.path.join(base_dir, 'train.csv')
train_image_dir = os.path.join(base_dir, 'train_dataset')
test_image_dir = os.path.join(base_dir, 'test_dataset')

In [10]:
train_labels = pd.read_csv(train_csv_path)

train_imgs, val_imgs = train_test_split(train_labels, test_size=0.2, random_state=42)

In [11]:
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=20, 
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   shear_range=0.2, 
                                   zoom_range=0.2, 
                                   horizontal_flip=True, 
                                   fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(train_imgs, 
                                                    directory='train_dataset', 
                                                    x_col='File Name', 
                                                    y_col='Class', 
                                                    target_size=(224, 224), 
                                                    batch_size=32, 
                                                    class_mode='categorical')

val_generator = val_datagen.flow_from_dataframe(val_imgs, 
                                                directory='train_dataset', 
                                                x_col='File Name', 
                                                y_col='Class', 
                                                target_size=(224, 224), 
                                                batch_size=32, 
                                                class_mode='categorical')

Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.




In [12]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

x = base_model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(len(train_labels['Class'].unique()), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


In [13]:
train_generator = train_datagen.flow_from_dataframe(
    train_imgs,
    directory=train_image_dir,
    x_col='File Name',  
    y_col='Class',       
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_dataframe(
    val_imgs,
    directory=train_image_dir,
    x_col='File Name',  
    y_col='Class',       
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

print("Classes in train_generator:", train_generator.class_indices)
print("Classes in val_generator:", val_generator.class_indices)

Found 4960 validated image filenames belonging to 8 classes.
Found 1241 validated image filenames belonging to 8 classes.
Classes in train_generator: {'bright dune': 0, 'crater': 1, 'dark dune': 2, 'impact ejecta': 3, 'other': 4, 'slope streak': 5, 'spider': 6, 'swiss cheese': 7}
Classes in val_generator: {'bright dune': 0, 'crater': 1, 'dark dune': 2, 'impact ejecta': 3, 'other': 4, 'slope streak': 5, 'spider': 6, 'swiss cheese': 7}


In [14]:
history = model.fit(train_generator, validation_data=val_generator, epochs=20)

Epoch 1/20


  self._warn_if_super_not_called()


[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 2s/step - accuracy: 0.7083 - loss: 1.6967 - val_accuracy: 0.8815 - val_loss: 0.3970
Epoch 2/20
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 1s/step - accuracy: 0.8388 - loss: 0.5225 - val_accuracy: 0.9049 - val_loss: 0.2991
Epoch 3/20
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 1s/step - accuracy: 0.8600 - loss: 0.4251 - val_accuracy: 0.9057 - val_loss: 0.2736
Epoch 4/20
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 1s/step - accuracy: 0.8679 - loss: 0.4077 - val_accuracy: 0.9154 - val_loss: 0.2650
Epoch 5/20
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 1s/step - accuracy: 0.8846 - loss: 0.3438 - val_accuracy: 0.9154 - val_loss: 0.2470
Epoch 6/20
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1465s[0m 9s/step - accuracy: 0.8835 - loss: 0.3430 - val_accuracy: 0.9081 - val_loss: 0.2658
Epoch 7/20
[1m155/155[0m [32m

In [15]:
model.save('model.h5')



In [16]:
test_image_files = [f for f in os.listdir(test_image_dir) if f.lower().endswith(('png', 'jpg', 'jpeg'))]

test_df = pd.DataFrame({'image_name': test_image_files})

print("Test DataFrame head:\n", test_df.head())

Test DataFrame head:
   image_name
0      1.jpg
1     10.jpg
2    100.jpg
3   1000.jpg
4   1001.jpg


In [17]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    directory=test_image_dir,
    x_col='image_name',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

Found 2000 validated image filenames.


In [21]:
predictions = model.predict(test_generator)
predicted_classes = predictions.argmax(axis=-1)


filenames = test_generator.filenames
results = pd.DataFrame({"Filename": filenames, "Predictions": predicted_classes})
results.to_csv("KBLD_submission.csv", index=False)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 622ms/step


In [22]:
output_path = 'KBLD_submission.csv'

results.to_csv(output_path, index=False)

if os.path.exists(output_path):
    print(f"File saved successfully at {output_path}")
else:
    print(f"Failed to save file at {output_path}")


File saved successfully at KBLD_submission.csv


In [23]:
for filename, prediction in zip(filenames[:10], predicted_classes[:10]):  
    print(f"Filename: {filename}, Prediction: {prediction}")


Filename: 1.jpg, Prediction: 5
Filename: 10.jpg, Prediction: 1
Filename: 100.jpg, Prediction: 4
Filename: 1000.jpg, Prediction: 4
Filename: 1001.jpg, Prediction: 4
Filename: 1002.jpg, Prediction: 4
Filename: 1003.jpg, Prediction: 4
Filename: 1004.jpg, Prediction: 4
Filename: 1005.jpg, Prediction: 4
Filename: 1006.jpg, Prediction: 5
