In [None]:
import os

# Path to the dataset folder
dataset_dir = '/kaggle/input/datathon-ai-confluence-iitg-24'

# List all directories and files inside the dataset folder
print("Contents of the dataset folder:", os.listdir(dataset_dir))

In [None]:
# Path to the dataset_public folder
dataset_public_dir = '/kaggle/input/datathon-ai-confluence-iitg-24/Dataset'

# List all directories and files inside the dataset_public folder
print("Contents of the dataset_public folder:", os.listdir(dataset_public_dir))


In [None]:
# Path to the train directory
train_dir = '/kaggle/input/datathon-ai-confluence-iitg-24/Dataset/train'

# List all categories in the train directory
categories = os.listdir(train_dir)

print("Available categories:", categories)


In [None]:
import os
import cv2
import numpy as np

# Define image size (you can adjust it based on your requirements)
image_size = (64, 64)

# Initialize lists to store the images and labels
X = []
y = []

# Iterate through each category and load images
for category in categories:
    category_path = os.path.join(train_dir, category)  # Get path to category folder
    for img_name in os.listdir(category_path):  # Iterate through each image
        img_path = os.path.join(category_path, img_name)
        
        # Read and resize the image
        img = cv2.imread(img_path)
        img = cv2.resize(img, image_size)  # Resize to uniform size
        
        # Append the image and corresponding label
        X.append(img)
        y.append(category)

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

print(f"Loaded {len(X)} images from {len(categories)} categories.")

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Step 1: Label Encoding the categories
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Step 2: One-hot encoding the labels
y_one_hot = to_categorical(y_encoded)

print(f"Encoded labels: {y_encoded[:5]}")  # Show the first 5 encoded labels
print(f"One-hot encoded labels: {y_one_hot[:5]}")  # Show the first 5 one-hot encoded labels


In [None]:
# Normalize the images
X_normalized = X / 255.0

print(f"Image shape after normalization: {X_normalized.shape}")

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_normalized, y_one_hot, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}, Validation set size: {X_val.shape[0]}")

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build the CNN model
model = Sequential()

# Add layers to the model
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))  # Convolutional layer
model.add(MaxPooling2D(pool_size=(2, 2)))  # Pooling layer
model.add(Conv2D(64, (3, 3), activation='relu'))  # Another Convolutional layer
model.add(MaxPooling2D(pool_size=(2, 2)))  # Pooling layer
model.add(Flatten())  # Flatten the output
model.add(Dense(128, activation='relu'))  # Fully connected layer
model.add(Dropout(0.5))  # Dropout to prevent overfitting
model.add(Dense(len(categories), activation='softmax'))  # Output layer with softmax activation

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Save the model after training
model.save('vehicle_classification_model.h5')

In [None]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [None]:
# Load new image
from keras.preprocessing import image
img = image.load_img('/kaggle/input/datathon-ai-confluence-iitg-24/Dataset/train/Cars/1.jpg', target_size=(64, 64))  # Adjust size as needed
img_array = image.img_to_array(img)  # Convert to numpy array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = img_array / 255.0  # Normalize the image

# Predict
prediction = model.predict(img_array)
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
print(f"Predicted class: {predicted_class}")

In [None]:
# Set a confidence threshold
confidence_threshold = 0.7

# Get the class with the highest probability
prediction = model.predict(img_array)
predicted_class_prob = np.max(prediction)
predicted_class = np.argmax(prediction)

# Check if the confidence is above the threshold
if predicted_class_prob >= confidence_threshold:
    predicted_label = label_encoder.inverse_transform([predicted_class])
    print(f"Predicted class: {predicted_label} with confidence: {predicted_class_prob:.2f}")
else:
    print(f"Prediction confidence is too low: {predicted_class_prob:.2f}")

In [None]:
from sklearn.preprocessing import LabelEncoder

# List of class labels used during training
class_labels = ['Cars', 'Truck', 'Auto Rickshaws', 'Bus', 'Bicycles', 'Trains', 'Motorcycles']  # Replace with your actual class labels

# Recreate the LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(class_labels)

# Save the recreated LabelEncoder for future use
import pickle
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
print("LabelEncoder recreated and saved.")


In [None]:
from keras.models import load_model
from keras.preprocessing import image
import numpy as np
import pandas as pd
import os

# Load the trained model
model = load_model('vehicle_classification_model.h5')

# Load the label encoder (if you have it saved as a file, replace with its path)
import pickle
with open('label_encoder.pkl', 'rb') as f:  # Adjust the path if necessary
    label_encoder = pickle.load(f)

# Define confidence threshold
confidence_threshold = 0.7

In [None]:
import os
import pandas as pd
from keras.preprocessing import image
import numpy as np
from keras.models import load_model
from PIL import ImageFile, Image

# Allow loading of truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Paths
test_dir = '/kaggle/input/datathon-ai-confluence-iitg-24/Dataset/test'  # Update to your test dataset folder
model_path = 'vehicle_classification_model.h5'
model = load_model(model_path)

# Label encoder (recreate or load as needed)
from sklearn.preprocessing import LabelEncoder
labels = ['Cars', 'Trucks', 'Buses', 'Motorcycles', 'Trains', 'Bicycles', 'Auto Rickshaws']
label_encoder = LabelEncoder()
label_encoder.fit(labels)

# Validate images to exclude corrupted files
valid_images = []
test_images = sorted(os.listdir(test_dir))
for img_name in test_images:
    img_path = os.path.join(test_dir, img_name)
    try:
        with Image.open(img_path) as img:
            img.verify()  # Verify image integrity
            valid_images.append(img_name)
    except Exception as e:
        print(f"Skipping corrupted image: {img_name} ({e})")

# Initialize results
results = []

for img_name in valid_images:
    img_path = os.path.join(test_dir, img_name)
    try:
        # Load and preprocess image
        img = image.load_img(img_path, target_size=(64, 64))  # Match training image size
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = img_array / 255.0

        # Predict
        prediction = model.predict(img_array)
        predicted_class = np.argmax(prediction)
        predicted_label = label_encoder.inverse_transform([predicted_class])[0]

        # Remove the file extension
        image_id = os.path.splitext(img_name)[0]

        # Append result
        results.append({'ID': image_id, 'class': predicted_label})
    except Exception as e:
        print(f"Error processing {img_name}: {e}")

# Create DataFrame
submission_df = pd.DataFrame(results)

# Save as CSV
submission_file = 'submission.csv'
submission_df.to_csv(submission_file, index=False)
print(f"Submission saved as {submission_file}")