In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report

# -----------------------------
# 1. Load Data and Define Inputs
# -----------------------------
df = pd.read_csv("/content/Final_shape_colorintensity_cut_clartyupdated_dataset.csv")

# Define your tabular feature columns (update as needed)

target_columns = ["clarity", "cut","color_intensity","shape"]
image_column = "Image id"


feature_columns = [col for col in df.columns if col not in target_columns + [image_column]]

X_tab = df[feature_columns]

X_tab.head()

Unnamed: 0,Contrast,Homogeneity,Energy,Correlation,Edge_Density,Intensity_Variance,Hue_Std,Saturation_Std,Aspect_Ratio,Perimeter,Area,Circularity,Convexity,Edge_Sharpness,Symmetry
0,3.261303,0.888582,0.74926,0.995556,2.170091,750.484431,47.795822,63.518461,1.002882,1373.254833,117885.0,0.785536,0.993444,22.581386,0.972512
1,8.984511,0.844536,0.772806,0.995882,5.50375,3341.218207,34.060722,25.861783,0.65678,1293.124012,105705.5,0.794378,0.995606,43.235618,0.925087
2,10.969107,0.845418,0.791677,0.995696,8.323262,4394.860524,45.950897,12.087048,0.623529,306.676186,1695.0,0.226475,0.662368,135.527886,0.926447
3,12.973305,0.85054,0.797043,0.989103,7.084543,2284.82049,43.15884,24.81212,1.092697,1175.928057,727.0,0.000475,0.007589,226.929642,0.935908
4,8.719243,0.873483,0.814791,0.996255,6.012195,3506.119206,39.389006,16.391353,1.935484,239.96551,501.0,0.109333,0.442971,106.968626,0.945912


In [None]:
# Mount Google Drive (if not already mounted)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
image_paths = df['Image id']

In [None]:
import pickle
from sklearn.preprocessing import OneHotEncoder

# Initialize encoders
encoder_clarity = OneHotEncoder(sparse_output=False)
encoder_cut = OneHotEncoder(sparse_output=False)
encoder_color_intensity = OneHotEncoder(sparse_output=False)
encoder_shape = OneHotEncoder(sparse_output=False)

# Fit and transform
y_clarity = encoder_clarity.fit_transform(df[[target_columns[0]]])
y_cut = encoder_cut.fit_transform(df[[target_columns[1]]])
y_color_intensity = encoder_color_intensity.fit_transform(df[[target_columns[2]]])
y_shape = encoder_shape.fit_transform(df[[target_columns[3]]])

# Save encoders for later use
with open('encoders.pkl', 'wb') as f:
    pickle.dump({
        'clarity': encoder_clarity,
        'cut': encoder_cut,
        'color_intensity': encoder_color_intensity,
        'shape': encoder_shape
    }, f)
# -----------------------------
# 2. Split Data (including image paths)
# -----------------------------
# Split into train and temp (test+validation)
X_tab_train, X_tab_temp, img_paths_train, img_paths_temp, \
    y_clarity_train, y_clarity_temp, y_cut_train, y_cut_temp , y_color_intensity_train, y_color_intensity_temp,y_shape_train , y_shape_temp = train_test_split(
    X_tab, image_paths, y_clarity, y_cut, y_color_intensity, y_shape, test_size=0.4, random_state=42
)

# Further split temp into validation and test sets
X_tab_val, X_tab_test, img_paths_val, img_paths_test, \
    y_clarity_val, y_clarity_test, y_cut_val, y_cut_test,y_color_intensity_val, y_color_intensity_test, y_shape_val , y_shape_test  = train_test_split(
    X_tab_temp, img_paths_temp, y_clarity_temp, y_cut_temp, y_color_intensity_temp , y_shape_temp, test_size=0.5, random_state=42
)

In [None]:

# Combine tabular features and image paths into a DataFrame
train_df = X_tab_train.copy()
train_df["img_path"] = img_paths_train

# Initialize the oversampler
ros = RandomOverSampler(random_state=42)

# Oversample using y_clarity (to maintain consistency across targets)
train_df_resampled, y_clarity_resampled = ros.fit_resample(train_df, y_clarity_train)

# Extract resampled tabular data and image paths
X_tab_train_resampled = train_df_resampled.drop(columns=["img_path"])  # Remove img_path before training
img_paths_train_resampled = train_df_resampled["img_path"].values

# Get the indices used for resampling
resample_indices = ros.sample_indices_

# Apply the same resampling indices to other target labels
y_color_intensity_resampled = y_color_intensity_train[resample_indices]
y_cut_resampled = y_cut_train[resample_indices]
y_shape_resampled = y_shape_train[resample_indices]

# Convert targets back to NumPy arrays
y_clarity_resampled = np.array(y_clarity_resampled)
y_color_intensity_resampled = np.array(y_color_intensity_resampled)
y_cut_resampled = np.array(y_cut_resampled)
y_shape_resampled = np.array(y_shape_resampled)

# print all lengths
print(len(X_tab_train_resampled), len(img_paths_train_resampled),
      len(y_clarity_resampled), len(y_color_intensity_resampled),
      len(y_cut_resampled), len(y_shape_resampled))

19952 19952 19952 19952 19952 19952


In [None]:
# Initialize and fit the scaler
scaler = StandardScaler()
X_tab_train_scaled = scaler.fit_transform(X_tab_train_resampled)
X_tab_val_scaled   = scaler.transform(X_tab_val)
X_tab_test_scaled  = scaler.transform(X_tab_test)

# Save the fitted scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


# Define a helper function to load and preprocess images
def load_and_preprocess_image(image_path, target_size=(128, 128)):
    # Check if the image path exists
    if os.path.exists(image_path):
        # Load and preprocess the image if it exists
        print(image_path)
        img = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        # Scale pixel values to [0, 1]
        return img_array / 255.0
    else:
        # If image does not exist, print a warning and return None
        print(f"Warning: {image_path} does not exist. Skipping...")
        return None

# Load images for each split (this may take some time depending on dataset size)
train_images = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_train_resampled) if img is not None])
val_images   = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_val) if img is not None])
test_images  = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_test) if img is not None])


In [None]:
# Convert labels to NumPy arrays (using correct resampled labels)
Y_clarity_train_array = np.array(y_clarity_resampled)
Y_cut_train_array     = np.array(y_cut_resampled)
Y_color_intensity_train_array   = np.array(y_color_intensity_resampled)
Y_shape_train_array   = np.array(y_shape_resampled)

Y_clarity_val_array = np.array(y_clarity_val)
Y_cut_val_array     = np.array(y_cut_val)
Y_color_intensity_val_array   = np.array(y_color_intensity_val)
Y_shape_val_array   = np.array(y_shape_val)

Y_clarity_test_array = np.array(y_clarity_test)
Y_cut_test_array     = np.array(y_cut_test)
Y_color_intensity_test_array   = np.array(y_color_intensity_test)
Y_shape_test_array   = np.array(y_shape_test)

In [None]:
# -----------------------------
# 5. Build the Multi-Input Model
# -----------------------------
# Tabular (MLP) branch
tab_input = keras.Input(shape=(X_tab_train_scaled.shape[1],), name="tab_input")
t = layers.Dense(128, activation="relu")(tab_input)
t = layers.Dropout(0.3)(t)
t = layers.Dense(256, activation="relu")(t)
t = layers.Dropout(0.3)(t)
t = layers.Dense(128, activation="relu")(t)
t = layers.Dropout(0.3)(t)
t = layers.Dense(64, activation="relu")(t)

# Image (CNN) branch – adjust target_size and architecture as needed
img_input = keras.Input(shape=(128, 128, 3), name="img_input")
c = layers.Conv2D(32, (3, 3), activation="relu")(img_input)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Conv2D(64, (3, 3), activation="relu")(c)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Conv2D(128, (3, 3), activation="relu")(c)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Flatten()(c)
c = layers.Dense(64, activation="relu")(c)

# Combine both branches
combined = layers.concatenate([t, c])
combined = layers.Dense(128, activation="relu")(combined)

# Output layers for multi-output classification
clarity_output = layers.Dense(Y_clarity_train_array.shape[1], activation="softmax", name="clarity")(combined)
cut_output     = layers.Dense(Y_cut_train_array.shape[1], activation="softmax", name="cut")(combined)
color_intensity_output   = layers.Dense(Y_color_intensity_train_array.shape[1], activation="softmax", name="color_intensity")(combined)
shape_output   = layers.Dense(Y_shape_train_array.shape[1], activation="softmax", name="shape")(combined)

# Define the model with two inputs
model = keras.Model(inputs=[tab_input, img_input],
                    outputs=[clarity_output,cut_output ,color_intensity_output , shape_output])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss={
        "clarity": "categorical_crossentropy",
        "cut": "categorical_crossentropy",
        "color_intensity": "categorical_crossentropy",
        "shape": "categorical_crossentropy"
    },
    metrics={
        "clarity": "accuracy",
        "cut": "accuracy",
        "color_intensity": "accuracy",
        "shape": "accuracy"
    }
)

model.summary()

In [None]:


# -----------------------------
# 6. Train the Model
# -----------------------------
history = model.fit(
    [X_tab_train_scaled, train_images],
    {"clarity": Y_clarity_train_array, "cut": Y_cut_train_array , "color_intensity": Y_color_intensity_train_array, "shape": Y_shape_train_array},
    validation_data=(
        [X_tab_val_scaled, val_images],
        {"clarity": Y_clarity_val_array,"cut": Y_cut_val_array ,  "color_intensity": Y_color_intensity_val_array,  "shape": Y_shape_val_array}
    ),
    epochs=10,
    batch_size=32,
    verbose=1
)

# -----------------------------
# 7. Evaluate the Model
# -----------------------------
# Predict on the test set
predictions = model.predict([X_tab_test_scaled, test_images])
# Convert probabilities to predicted class indices
Y_test_pred = [np.argmax(pred, axis=1) for pred in predictions]

# Convert one-hot test labels back to class indices
Y_test_true = [
    np.argmax(Y_clarity_test_array, axis=1),
    np.argmax(Y_cut_test_array, axis=1),
    np.argmax(Y_color_intensity_test_array, axis=1),
    np.argmax(Y_shape_test_array, axis=1)
]

# Print classification reports for each output
for i, name in enumerate(["Clarity", "Cut","Color_intensity" , "Shape"]):
    print(f"{name} Classification Report:\n", classification_report(Y_test_true[i], Y_test_pred[i]))

# Save the trained model
model.save("gemstone_quality_model.h5")


Epoch 1/10
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 265ms/step - clarity_accuracy: 0.4630 - clarity_loss: 1.1067 - color_intensity_accuracy: 0.5376 - color_intensity_loss: 1.2689 - cut_accuracy: 0.8353 - cut_loss: 0.6768 - loss: 4.2027 - shape_accuracy: 0.6494 - shape_loss: 1.1503 - val_clarity_accuracy: 0.5494 - val_clarity_loss: 0.9704 - val_color_intensity_accuracy: 0.5578 - val_color_intensity_loss: 1.1673 - val_cut_accuracy: 0.8777 - val_cut_loss: 0.4945 - val_loss: 3.3576 - val_shape_accuracy: 0.8163 - val_shape_loss: 0.7398
Epoch 2/10
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 267ms/step - clarity_accuracy: 0.6628 - clarity_loss: 0.6989 - color_intensity_accuracy: 0.7030 - color_intensity_loss: 0.7904 - cut_accuracy: 0.9447 - cut_loss: 0.2287 - loss: 2.0692 - shape_accuracy: 0.9096 - shape_loss: 0.3512 - val_clarity_accuracy: 0.5793 - val_clarity_loss: 0.9144 - val_color_intensity_accuracy: 0.5454 - val_color_intensity_loss: 1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Color_intensity Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.29      0.41        41
           1       0.33      0.25      0.29        51
           2       0.11      0.07      0.09        14
           3       0.52      0.61      0.56       898
           4       0.11      0.05      0.07        57
           5       0.28      0.31      0.29       213
           6       0.32      0.35      0.33       344
           7       0.22      0.40      0.28       121
           8       0.38      0.18      0.24        28
           9       0.04      0.04      0.04        27
          10       0.75      0.53      0.62       717

    accuracy                           0.48      2511
   macro avg       0.34      0.28      0.29      2511
weighted avg       0.50      0.48      0.48      2511

Shape Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.73      0.69       104
        

# Without Oversampling

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report

# -----------------------------
# 1. Load Data and Define Inputs
# -----------------------------
df = pd.read_csv("/content/Final_shape_colorintensity_cut_clartyupdated_dataset.csv")

# Define your tabular feature columns (update as needed)

target_columns = ["clarity", "cut","color_intensity","shape"]
image_column = "Image id"


feature_columns = [col for col in df.columns if col not in target_columns + [image_column]]

X_tab = df[feature_columns]

X_tab.head()

Unnamed: 0,Contrast,Homogeneity,Energy,Correlation,Edge_Density,Intensity_Variance,Hue_Std,Saturation_Std,Aspect_Ratio,Perimeter,Area,Circularity,Convexity,Edge_Sharpness,Symmetry
0,3.261303,0.888582,0.74926,0.995556,2.170091,750.484431,47.795822,63.518461,1.002882,1373.254833,117885.0,0.785536,0.993444,22.581386,0.972512
1,8.984511,0.844536,0.772806,0.995882,5.50375,3341.218207,34.060722,25.861783,0.65678,1293.124012,105705.5,0.794378,0.995606,43.235618,0.925087
2,10.969107,0.845418,0.791677,0.995696,8.323262,4394.860524,45.950897,12.087048,0.623529,306.676186,1695.0,0.226475,0.662368,135.527886,0.926447
3,12.973305,0.85054,0.797043,0.989103,7.084543,2284.82049,43.15884,24.81212,1.092697,1175.928057,727.0,0.000475,0.007589,226.929642,0.935908
4,8.719243,0.873483,0.814791,0.996255,6.012195,3506.119206,39.389006,16.391353,1.935484,239.96551,501.0,0.109333,0.442971,106.968626,0.945912


In [None]:
image_paths = df['Image id']

In [None]:
import pickle
from sklearn.preprocessing import OneHotEncoder

# Initialize encoders
encoder_clarity = OneHotEncoder(sparse_output=False)
encoder_cut = OneHotEncoder(sparse_output=False)
encoder_color_intensity = OneHotEncoder(sparse_output=False)
encoder_shape = OneHotEncoder(sparse_output=False)

# Fit and transform
y_clarity = encoder_clarity.fit_transform(df[[target_columns[0]]])
y_cut = encoder_cut.fit_transform(df[[target_columns[1]]])
y_color_intensity = encoder_color_intensity.fit_transform(df[[target_columns[2]]])
y_shape = encoder_shape.fit_transform(df[[target_columns[3]]])

# Save encoders for later use
with open('Encoders.pkl', 'wb') as f:
    pickle.dump({
        'clarity': encoder_clarity,
        'cut': encoder_cut,
        'color_intensity': encoder_color_intensity,
        'shape': encoder_shape
    }, f)

# -----------------------------
# 2. Split Data (including image paths)
# -----------------------------
# Split into train and temp (test+validation)
X_tab_train, X_tab_temp, img_paths_train, img_paths_temp, \
    y_clarity_train, y_clarity_temp, y_cut_train, y_cut_temp , y_color_intensity_train, y_color_intensity_temp,y_shape_train , y_shape_temp = train_test_split(
    X_tab, image_paths, y_clarity, y_cut, y_color_intensity, y_shape, test_size=0.4, random_state=42
)

# Further split temp into validation and test sets
X_tab_val, X_tab_test, img_paths_val, img_paths_test, \
    y_clarity_val, y_clarity_test, y_cut_val, y_cut_test,y_color_intensity_val, y_color_intensity_test, y_shape_val , y_shape_test  = train_test_split(
    X_tab_temp, img_paths_temp, y_clarity_temp, y_cut_temp, y_color_intensity_temp , y_shape_temp, test_size=0.5, random_state=42
)

In [None]:
# -----------------------------
# 3. Preprocess Tabular Data and Load Images
# -----------------------------
# Scale the tabular features
scaler = StandardScaler()
X_tab_train_scaled = scaler.fit_transform(X_tab_train)
X_tab_val_scaled   = scaler.transform(X_tab_val)
X_tab_test_scaled  = scaler.transform(X_tab_test)
# Save the fitted scaler
with open('Scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Define a helper function to load and preprocess images
def load_and_preprocess_image(image_path, target_size=(128, 128)):
    # Check if the image path exists
    if os.path.exists(image_path):
        # Load and preprocess the image if it exists
        print(image_path)
        img = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        # Scale pixel values to [0, 1]
        return img_array / 255.0
    else:
        # If image does not exist, print a warning and return None
        print(f"Warning: {image_path} does not exist. Skipping...")
        return None

# Load images for each split (this may take some time depending on dataset size)
train_images = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_train) if img is not None])
val_images   = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_val) if img is not None])
test_images  = np.array([img for img in (load_and_preprocess_image(path) for path in img_paths_test) if img is not None])

# Convert labels to NumPy arrays
Y_clarity_train_array = np.array(y_clarity_train)
Y_cut_train_array     = np.array(y_cut_train)
Y_color_intensity_train_array   = np.array(y_color_intensity_train)
Y_shape_train_array   = np.array(y_shape_train)

Y_clarity_val_array = np.array(y_clarity_val)
Y_cut_val_array     = np.array(y_cut_val)
Y_color_intensity_val_array   = np.array(y_color_intensity_val)
Y_shape_val_array   = np.array(y_shape_val)

Y_clarity_test_array = np.array(y_clarity_test)
Y_cut_test_array     = np.array(y_cut_test)
Y_color_intensity_test_array   = np.array(y_color_intensity_test)
Y_shape_test_array   = np.array(y_shape_test)

In [None]:
# -----------------------------
# 5. Build the Multi-Input Model
# -----------------------------
# Tabular (MLP) branch
tab_input = keras.Input(shape=(X_tab_train_scaled.shape[1],), name="tab_input")
t = layers.Dense(128, activation="relu")(tab_input)
t = layers.Dropout(0.3)(t)
t = layers.Dense(256, activation="relu")(t)
t = layers.Dropout(0.3)(t)
t = layers.Dense(128, activation="relu")(t)
t = layers.Dropout(0.3)(t)
t = layers.Dense(64, activation="relu")(t)

# Image (CNN) branch – adjust target_size and architecture as needed
img_input = keras.Input(shape=(128, 128, 3), name="img_input")
c = layers.Conv2D(32, (3, 3), activation="relu")(img_input)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Conv2D(64, (3, 3), activation="relu")(c)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Conv2D(128, (3, 3), activation="relu")(c)
c = layers.MaxPooling2D((2, 2))(c)
c = layers.Flatten()(c)
c = layers.Dense(64, activation="relu")(c)

# Combine both branches
combined = layers.concatenate([t, c])
combined = layers.Dense(128, activation="relu")(combined)

# Output layers for multi-output classification
clarity_output = layers.Dense(Y_clarity_train_array.shape[1], activation="softmax", name="clarity")(combined)
cut_output     = layers.Dense(Y_cut_train_array.shape[1], activation="softmax", name="cut")(combined)
color_intensity_output   = layers.Dense(Y_color_intensity_train_array.shape[1], activation="softmax", name="color_intensity")(combined)
shape_output   = layers.Dense(Y_shape_train_array.shape[1], activation="softmax", name="shape")(combined)

# Define the model with two inputs
model = keras.Model(inputs=[tab_input, img_input],
                    outputs=[clarity_output,cut_output ,color_intensity_output , shape_output])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss={
        "clarity": "categorical_crossentropy",
        "cut": "categorical_crossentropy",
        "color_intensity": "categorical_crossentropy",
        "shape": "categorical_crossentropy"
    },
    metrics={
        "clarity": "accuracy",
        "cut": "accuracy",
        "color_intensity": "accuracy",
        "shape": "accuracy"
    }
)

model.summary()

In [None]:


# -----------------------------
# 6. Train the Model
# -----------------------------
history = model.fit(
    [X_tab_train_scaled, train_images],
    {"clarity": Y_clarity_train_array, "cut": Y_cut_train_array , "color_intensity": Y_color_intensity_train_array, "shape": Y_shape_train_array},
    validation_data=(
        [X_tab_val_scaled, val_images],
        {"clarity": Y_clarity_val_array,"cut": Y_cut_val_array ,  "color_intensity": Y_color_intensity_val_array,  "shape": Y_shape_val_array}
    ),
    epochs=10,
    batch_size=32,
    verbose=1
)

# -----------------------------
# 7. Evaluate the Model
# -----------------------------
# Predict on the test set
predictions = model.predict([X_tab_test_scaled, test_images])
# Convert probabilities to predicted class indices
Y_test_pred = [np.argmax(pred, axis=1) for pred in predictions]

# Convert one-hot test labels back to class indices
Y_test_true = [
    np.argmax(Y_clarity_test_array, axis=1),
    np.argmax(Y_cut_test_array, axis=1),
    np.argmax(Y_color_intensity_test_array, axis=1),
    np.argmax(Y_shape_test_array, axis=1)
]

# Print classification reports for each output
for i, name in enumerate(["Clarity", "Cut","Color_intensity" , "Shape"]):
    print(f"{name} Classification Report:\n", classification_report(Y_test_true[i], Y_test_pred[i]))


# Save the trained model
model.save("Gemstone_quality_model.h5")

Epoch 1/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 290ms/step - clarity_accuracy: 0.6294 - clarity_loss: 0.9310 - color_intensity_accuracy: 0.4398 - color_intensity_loss: 1.5367 - cut_accuracy: 0.7703 - cut_loss: 0.9353 - loss: 5.0226 - shape_accuracy: 0.4679 - shape_loss: 1.6196 - val_clarity_accuracy: 0.6629 - val_clarity_loss: 0.8345 - val_color_intensity_accuracy: 0.5215 - val_color_intensity_loss: 1.2410 - val_cut_accuracy: 0.8546 - val_cut_loss: 0.5674 - val_loss: 3.5097 - val_shape_accuracy: 0.7606 - val_shape_loss: 0.8789
Epoch 2/10
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 276ms/step - clarity_accuracy: 0.6658 - clarity_loss: 0.8451 - color_intensity_accuracy: 0.5362 - color_intensity_loss: 1.2041 - cut_accuracy: 0.8701 - cut_loss: 0.4783 - loss: 3.2561 - shape_accuracy: 0.8075 - shape_loss: 0.7285 - val_clarity_accuracy: 0.6558 - val_clarity_loss: 0.8276 - val_color_intensity_accuracy: 0.5542 - val_color_intensity_loss: 1.14

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Color_intensity Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.61      0.61        41
           1       0.58      0.22      0.31        51
           2       0.00      0.00      0.00        14
           3       0.57      0.51      0.54       898
           4       0.22      0.04      0.06        57
           5       0.27      0.17      0.21       213
           6       0.32      0.31      0.31       344
           7       0.31      0.49      0.38       121
           8       0.30      0.29      0.29        28
           9       0.31      0.15      0.20        27
          10       0.64      0.84      0.72       717

    accuracy                           0.52      2511
   macro avg       0.37      0.33      0.33      2511
weighted avg       0.50      0.52      0.50      2511

Shape Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.70      0.77       104
        

# Model test

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import joblib
import cv2
import os
from skimage.feature import graycomatrix, graycoprops

# Load the trained model
model = load_model("gemstone_quality_model.h5")
print("Model loaded successfully!")

# Load the saved scaler for tabular data
scaler = joblib.load("scaler.pkl")

# Load the saved encoder (if used)
encoder = joblib.load("encoders.pkl")

# Load test tabular data (Assuming it's available as 'X_tab_test')
# Ensure X_tab_test is already loaded before applying transformations
X_tab_test_scaled = scaler.transform(X_tab_test)

# ---------------------------
# Image Preprocessing & Feature Extraction
# ---------------------------

def load_and_preprocess_image(image_path, target_size=(128, 128)):
    """ Load and preprocess image for model """
    if os.path.exists(image_path):
        img = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        return img_array / 255.0  # Normalize
    else:
        print(f"Warning: {image_path} does not exist. Skipping...")
        return None

def extract_clarity_features(gray, hsv):
    """ Extract clarity-related features from grayscale and HSV images """
    gray = np.uint8(gray * 255)

    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    edges = cv2.Canny(gray, threshold1=50, threshold2=150)
    edge_density = np.sum(edges) / (gray.shape[0] * gray.shape[1])
    intensity_variance = np.var(gray)

    hue_std = np.std(hsv[:, :, 0])
    saturation_std = np.std(hsv[:, :, 1])

    return [contrast, homogeneity, energy, correlation, edge_density, intensity_variance, hue_std, saturation_std]

def extract_cut_features(gray):
    """ Extract cut-related features from grayscale images """
    gray = np.uint8(gray * 255)

    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, threshold1=50, threshold2=150)

    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        print("Warning: No contours found. Skipping cut analysis.")
        return None

    contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = float(w) / h if h != 0 else 0
    perimeter = cv2.arcLength(contour, True)
    area = cv2.contourArea(contour)
    circularity = (4 * np.pi * area) / (perimeter ** 2) if perimeter != 0 else 0
    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    convexity = area / hull_area if hull_area != 0 else 0
    edge_sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()

    flipped_horizontal = cv2.flip(gray, 1)
    symmetry_horizontal = cv2.absdiff(gray, flipped_horizontal)
    horizontal_symmetry_score = 1 - (np.mean(symmetry_horizontal) / 255)

    flipped_vertical = cv2.flip(gray, 0)
    symmetry_vertical = cv2.absdiff(gray, flipped_vertical)
    vertical_symmetry_score = 1 - (np.mean(symmetry_vertical) / 255)

    symmetry = (horizontal_symmetry_score + vertical_symmetry_score) / 2

    return [aspect_ratio, perimeter, area, circularity, convexity, edge_sharpness, symmetry]

# ---------------------------
# Process All Test Samples
# ---------------------------

test_image_paths = ["/content/Blue Sapphire 2864.jpg"]  # Replace with actual test image paths

image_features_list = []
clarity_features_list = []
cut_features_list = []

for image_path in test_image_paths:
    image = load_and_preprocess_image(image_path)

    if image is not None:
        # Convert to grayscale & HSV for feature extraction
        gray = cv2.cvtColor(np.uint8(image * 255), cv2.COLOR_RGB2GRAY)
        hsv = cv2.cvtColor(np.uint8(image * 255), cv2.COLOR_RGB2HSV)

        clarity_features = extract_clarity_features(gray, hsv)
        cut_features = extract_cut_features(gray)

        if cut_features is not None:  # Only add if valid
            image_features_list.append(image)
            clarity_features_list.append(clarity_features)
            cut_features_list.append(cut_features)

# Convert lists to numpy arrays
image_features_array = np.array(image_features_list)
clarity_features_array = np.array(clarity_features_list)
cut_features_array = np.array(cut_features_list)





Model loaded successfully!


In [None]:
# Select only one sample from tabular data to match extracted features
X_tab_single = X_tab_test_scaled[:1]  # Take only the first row

# Convert extracted features to proper shape
clarity_features_array = np.array(clarity_features_list)
cut_features_array = np.array(cut_features_list)

# Ensure all arrays have the same first dimension (batch size = 1)
X_tab_final = np.concatenate([X_tab_single, clarity_features_array, cut_features_array], axis=1)

# Ensure image shape is correct
X_image_final = np.array(image_features_list)

# Print final shapes before prediction
print("Tabular Data Shape:", X_tab_final.shape)   # Should be (1, total_features)
print("Image Data Shape:", X_image_final.shape)   # Should be (1, 128, 128, 3)



Tabular Data Shape: (1, 30)
Image Data Shape: (1, 128, 128, 3)


In [None]:
import numpy as np

# Ensure X_tab_final has the correct number of features (should match model input)
if X_tab_final.shape[1] != 15:
    X_tab_final = X_tab_final[:, :15]  # Keep only the first 15 columns

# Make predictions
predictions = model.predict([X_tab_final, X_image_final])

# Extract predicted class indices
clarity_pred = np.argmax(predictions[0], axis=1)
color_intensity_pred = np.argmax(predictions[1], axis=1)
cut_pred = np.argmax(predictions[2], axis=1)
shape_pred = np.argmax(predictions[3], axis=1)

# Convert predictions to class labels using .categories_
clarity_labels = encoder["clarity"].categories_[0][clarity_pred]
color_intensity_labels = encoder["color_intensity"].categories_[0][color_intensity_pred]
cut_labels = encoder["cut"].categories_[0][cut_pred]
shape_labels = encoder["shape"].categories_[0][shape_pred]

# Print results
print("Clarity Predictions:", clarity_labels)
print("Color Intensity Predictions:", color_intensity_labels)
print("Cut Predictions:", cut_labels)
print("Shape Predictions:", shape_labels)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
Clarity Predictions: ['Very Slightly Included']
Color Intensity Predictions: ['Medium Light']
Cut Predictions: ['Emerald Cut']
Shape Predictions: ['Round']
