In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import opendatasets as od
import cv2

In [25]:
# Download the dataset
od.download("https://www.kaggle.com/datasets/lsind18/gemstones-images")

Skipping, found downloaded files in ".\gemstones-images" (use force=True to force download)


In [26]:
# Define constants
BATCH_SIZE = 32
IMAGE_SIZE = (256, 256)

In [30]:
# Directories for training and testing data
train_data_dir = 'C:/Users/lasit/Desktop/Campus/4th year/1sem/DL/dl_assignment/gemstones-images/train'
test_data_dir = 'C:/Users/lasit/Desktop/Campus/4th year/1sem/DL/dl_assignment/gemstones-images/test'


In [32]:
# Load the datasets with validation split
train_data = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    subset='training',
    validation_split=0.1,
    seed=42
)

Found 2856 files belonging to 87 classes.
Using 2571 files for training.


In [34]:
validation_data = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    subset='validation',
    validation_split=0.1,
    seed=42
)


Found 2856 files belonging to 87 classes.
Using 285 files for validation.


In [36]:
test_data = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

Found 363 files belonging to 87 classes.


In [38]:
# Get class names
class_names = train_data.class_names
print("Class names:", class_names)

Class names: ['Alexandrite', 'Almandine', 'Amazonite', 'Amber', 'Amethyst', 'Ametrine', 'Andalusite', 'Andradite', 'Aquamarine', 'Aventurine Green', 'Aventurine Yellow', 'Benitoite', 'Beryl Golden', 'Bixbite', 'Bloodstone', 'Blue Lace Agate', 'Carnelian', 'Cats Eye', 'Chalcedony', 'Chalcedony Blue', 'Chrome Diopside', 'Chrysoberyl', 'Chrysocolla', 'Chrysoprase', 'Citrine', 'Coral', 'Danburite', 'Diamond', 'Diaspore', 'Dumortierite', 'Emerald', 'Fluorite', 'Garnet Red', 'Goshenite', 'Grossular', 'Hessonite', 'Hiddenite', 'Iolite', 'Jade', 'Jasper', 'Kunzite', 'Kyanite', 'Labradorite', 'Lapis Lazuli', 'Larimar', 'Malachite', 'Moonstone', 'Morganite', 'Onyx Black', 'Onyx Green', 'Onyx Red', 'Opal', 'Pearl', 'Peridot', 'Prehnite', 'Pyrite', 'Pyrope', 'Quartz Beer', 'Quartz Lemon', 'Quartz Rose', 'Quartz Rutilated', 'Quartz Smoky', 'Rhodochrosite', 'Rhodolite', 'Rhodonite', 'Ruby', 'Sapphire Blue', 'Sapphire Pink', 'Sapphire Purple', 'Sapphire Yellow', 'Scapolite', 'Serpentine', 'Sodalite',

In [40]:
# Print dataset info
print("Train Data:", train_data)
print("Validation Data:", validation_data)

Train Data: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
Validation Data: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [48]:
# Data Augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal_and_vertical'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
])

In [50]:
# Apply augmentation to training data
train_data = train_data.map(lambda x, y: (data_augmentation(x, training=True), y))

In [52]:
# Normalize data
train_data = train_data.map(lambda x, y: (x / 255.0, y))
validation_data = validation_data.map(lambda x, y: (x / 255.0, y))
test_data = test_data.map(lambda x, y: (x / 255.0, y))

In [54]:
# Cache and Prefetch for performance optimization
train_data = train_data.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
validation_data = validation_data.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

In [56]:
# Create the model
base_model = tf.keras.applications.InceptionV3(input_shape=(256, 256, 3), include_top=False, weights='imagenet', pooling='avg')
base_model.trainable = True  # Fine-tune the model

In [58]:
# Freeze all layers except the last 50
for layer in base_model.layers[:-50]:
    layer.trainable = False

In [60]:
# Define the model architecture
model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),  # Increased Dropout to reduce overfitting
    tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # L2 regularization
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(class_names), activation='softmax')  # Output layer for classification
])

Epoch 1/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m331s[0m 4s/step - accuracy: 0.0342 - loss: 4.3917 - val_accuracy: 0.0140 - val_loss: 4.5998
Epoch 2/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m282s[0m 3s/step - accuracy: 0.1418 - loss: 3.8585 - val_accuracy: 0.0140 - val_loss: 5.3920
Epoch 3/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 3s/step - accuracy: 0.2542 - loss: 3.2322 - val_accuracy: 0.0105 - val_loss: 6.4201
Epoch 4/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 3s/step - accuracy: 0.3607 - loss: 2.6101 - val_accuracy: 0.0105 - val_loss: 7.1152
Epoch 5/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 4s/step - accuracy: 0.4428 - loss: 2.0942 - val_accuracy: 0.0105 - val_loss: 7.0500
Epoch 6/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 4s/step - accuracy: 0.5422 - loss: 1.6523 - val_accuracy: 0.0175 - val_loss: 7.7317
Epoch 7/50
[1m81/81[0m [32m━━━━

In [62]:
# Compile the model with a reduced learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 775ms/step - accuracy: 0.4866 - loss: 3.4633
Test accuracy: 0.4903581142425537


In [None]:
# Train the model
history = model.fit(
    train_data,
    epochs=100,  # Increase the number of epochs
    validation_data=validation_data
)

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(test_data)
print(f'Test accuracy: {test_accuracy}')

In [None]:
# Plot accuracy and loss for training and validation
plt.figure(figsize=(12, 4))

In [None]:
# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.show()

In [69]:
# Load and preprocess a test image
image = cv2.imread('C:/Users/lasit/Desktop/Campus/4th year/1sem/DL/dl_assignment/amazonite_3.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
resized_image = cv2.resize(image, IMAGE_SIZE)  # Resize to (256, 256)

In [71]:
# Normalize and expand dimensions
scaled_image = resized_image / 255.0
scaled_image = np.expand_dims(scaled_image, axis=0)

In [73]:
# Predict
y_hat = model.predict(scaled_image)
predicted_class_index = np.argmax(y_hat, axis=-1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step


In [74]:
# Print the predicted class
print(f'Predicted class: {class_names[predicted_class_index[0]]}')

Predicted class: Tsavorite
