In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

In [7]:
data = pd.read_csv('../data/train.csv')
data.head()

Unnamed: 0,ImageId,ClassId
0,0002cc93b.jpg,1.0
1,0007a71bf.jpg,3.0
2,000a4bcdd.jpg,1.0
3,000f6bf48.jpg,4.0
4,0014fce06.jpg,3.0


In [9]:
data.columns

Index(['ImageId', 'ClassId'], dtype='object')

In [31]:
import os

train_images = []

for dirname, _, file_names in os.walk('../data/train_images/'):
    for filename in file_names:
        if filename.endswith('.jpg'):
            train_images.append(filename)

new_csv = pd.DataFrame({'ImageId' : train_images}, columns=['ImageId'])

In [75]:
import os

test_images = []

for dirname, _, file_names in os.walk('../data/test_images/'):
    for filename in file_names:
        if filename.endswith('.jpg'):
            test_images.append(filename)

In [32]:
new_csv.head()

Unnamed: 0,ImageId
0,fdd363326.jpg
1,fdd592bf5.jpg
2,fdd63753f.jpg
3,fde8b6a20.jpg
4,fde8d37fc.jpg


In [33]:
new_csv.describe()

Unnamed: 0,ImageId
count,100
unique,100
top,fdd363326.jpg
freq,1


In [34]:
new_csv.shape

(100, 1)

In [37]:
new_csv = new_csv.merge(data[['ImageId', 'ClassId']], on='ImageId', how='left')


In [38]:
new_csv.head()

Unnamed: 0,ImageId,ClassId
0,fdd363326.jpg,3.0
1,fdd592bf5.jpg,
2,fdd63753f.jpg,
3,fde8b6a20.jpg,
4,fde8d37fc.jpg,3.0


In [39]:
new_csv.shape

(103, 2)

In [None]:
new_csv = new_csv.dropna(subset=['ClassId'])

new_csv['ClassId'] = new_csv['ClassId'].astype(int)

In [55]:
import numpy as np
import os
from tensorflow.keras.utils import load_img, img_to_array

IMG_SIZE = (128, 128)  # or any size you want

X = []
for img_name in new_csv['ImageId']:
    img_path = os.path.join('../data/train_images/', img_name)
    img = load_img(img_path, target_size=IMG_SIZE)
    img_array = img_to_array(img) / 255.0  # Normalize pixel values to [0,1]
    X.append(img_array)

X = np.array(X)

# Labels
y = new_csv['ClassId'].values


In [57]:
from tensorflow.keras import models, layers

model = models.Sequential([
    layers.Input(shape=(128, 128, 3)),  # Input images are 128x128 RGB
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(5, activation='softmax')
])


In [None]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',  # Because y are integer labels (0,1,2,3)
    metrics=['accuracy']
)

In [60]:
history = model.fit(X, y, epochs=10, batch_size=64)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step - accuracy: 0.8333 - loss: 0.6915
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step - accuracy: 0.8500 - loss: 0.6442
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step - accuracy: 0.8500 - loss: 0.6056
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363ms/step - accuracy: 0.8500 - loss: 0.5637
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315ms/step - accuracy: 0.8500 - loss: 0.5507
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step - accuracy: 0.8500 - loss: 0.4991
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step - accuracy: 0.8500 - loss: 0.4748
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 278ms/step - accuracy: 0.8667 - loss: 0.4599
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [None]:
X_test = []
valid_test_images = []

for img_name in test_images:
    img_path = os.path.join('../data/test_images/', img_name)
    try:
        img = load_img(img_path, target_size=IMG_SIZE)
        img_array = img_to_array(img) / 255.0  # Normalize the pixel values
        X_test.append(img_array)
        valid_test_images.append(img_name)
    except Exception as e:
        print(f"Error loading image {img_name}: {e}")

X_test = np.array(X_test)

# Check how many images were successfully loaded
print(f"Loaded {len(valid_test_images)} valid test images.")

Loaded 12 valid test images.


In [77]:
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


In [80]:
len(predictions)

12

In [81]:
predicted_classes

array([3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=int64)

In [82]:
submission = pd.DataFrame({
    'ImageId': test_images,
    'PredictedClass': predicted_classes
})

submission.to_csv('submission.csv', index=False)

print("Predictions saved to submission.csv")

Predictions saved to submission.csv
