In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("/content/Final_reverse_One_hot_encoded_Processed_data.csv")

# Identify target columns
target_columns = ['clarity', 'color', 'cut']

# Drop target columns to get feature set
X = df.drop(columns=target_columns)

# OneHotEncode target labels
encoder = OneHotEncoder(sparse_output=False)
y_clarity = encoder.fit_transform(df[["clarity"]])
y_color = encoder.fit_transform(df[["color"]])
y_cut = encoder.fit_transform(df[["cut"]])

# Train-test split
X_train, X_test, y_clarity_train, y_clarity_test, y_color_train, y_color_test, y_cut_train, y_cut_test = train_test_split(
    X, y_clarity, y_color, y_cut, test_size=0.4, random_state=42
)

X_val, X_test, y_clarity_val, y_clarity_test, y_color_val, y_color_test, y_cut_val, y_cut_test = train_test_split(
    X_test, y_clarity_test, y_color_test, y_cut_test, test_size=0.5, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Convert labels to NumPy arrays
Y_clarity_train_array = np.array(y_clarity_train)
Y_color_train_array = np.array(y_color_train)
Y_cut_train_array = np.array(y_cut_train)

Y_clarity_val_array = np.array(y_clarity_val)
Y_color_val_array = np.array(y_color_val)
Y_cut_val_array = np.array(y_cut_val)

Y_clarity_test_array = np.array(y_clarity_test)
Y_color_test_array = np.array(y_color_test)
Y_cut_test_array = np.array(y_cut_test)


# Define the model with larger layers and added linear layers
input_layer = keras.Input(shape=(X_train_scaled.shape[1],))


# First linear layer before activation
x = layers.Dense(256, activation="linear")(input_layer)  # Linear transformation
x = layers.Dense(512, activation="relu")(x)  # Increased layer size
x = layers.Dropout(0.3)(x)
x = layers.Dense(1024, activation="relu")(x)  # Larger hidden layer
x = layers.Dense(512, activation="linear")(x)  # Another linear transformation
x = layers.Dropout(0.3)(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128, activation="relu")(x)
# Add a 64-unit layer after the 128-unit layer
x = layers.Dense(64, activation="relu")(x)



# Output layers (remains the same)
clarity_output = layers.Dense(Y_clarity_train_array.shape[1], activation="softmax", name="clarity")(x)
color_output = layers.Dense(Y_color_train_array.shape[1], activation="softmax", name="color")(x)
cut_output = layers.Dense(Y_cut_train_array.shape[1], activation="softmax", name="cut")(x)


# Compile the model
model = keras.Model(inputs=input_layer, outputs=[clarity_output, color_output, cut_output])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss={
        'clarity': 'categorical_crossentropy',
        'color': 'categorical_crossentropy',
        'cut': 'categorical_crossentropy'
    },
    metrics={
        'clarity': 'accuracy',
        'color': 'accuracy',
        'cut': 'accuracy'
    }
)



model.summary()

# Train the model
history = model.fit(
    X_train_scaled,
    {"clarity": Y_clarity_train_array, "color": Y_color_train_array, "cut": Y_cut_train_array},
    validation_data=(X_val_scaled, {"clarity": Y_clarity_val_array, "color": Y_color_val_array, "cut": Y_cut_val_array}),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Evaluate the model
Y_test_pred = model.predict(X_test_scaled)
Y_test_pred = [np.argmax(pred, axis=1) for pred in Y_test_pred]  # Convert probabilities to class labels

# Convert one-hot test labels back to class labels
Y_test_true = [
    np.argmax(Y_clarity_test_array, axis=1),
    np.argmax(Y_color_test_array, axis=1),
    np.argmax(Y_cut_test_array, axis=1)
]

# Generate classification reports
for i, name in enumerate(["Clarity", "Color", "Cut"]):
    print(f"{name} Classification Report:\n", classification_report(Y_test_true[i], Y_test_pred[i]))

# Save the model
model.save("DNN_gemstone_quality_model_large.h5")
print("Model with increased layers saved successfully.")

Epoch 1/50
[1m630/630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 29ms/step - clarity_accuracy: 0.4888 - clarity_loss: 1.0285 - color_accuracy: 0.3082 - color_loss: 2.0638 - cut_accuracy: 0.3673 - cut_loss: 1.5526 - loss: 4.6450 - val_clarity_accuracy: 0.6127 - val_clarity_loss: 0.7669 - val_color_accuracy: 0.3898 - val_color_loss: 1.5824 - val_cut_accuracy: 0.4172 - val_cut_loss: 1.3845 - val_loss: 3.7345
Epoch 2/50
[1m630/630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 31ms/step - clarity_accuracy: 0.5999 - clarity_loss: 0.7873 - color_accuracy: 0.4004 - color_loss: 1.5878 - cut_accuracy: 0.4243 - cut_loss: 1.3694 - loss: 3.7445 - val_clarity_accuracy: 0.6300 - val_clarity_loss: 0.7467 - val_color_accuracy: 0.4021 - val_color_loss: 1.5211 - val_cut_accuracy: 0.4360 - val_cut_loss: 1.3566 - val_loss: 3.6252
Epoch 3/50
[1m630/630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 28ms/step - clarity_accuracy: 0.6220 - clarity_loss: 0.7486 - color_accuracy: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Clarity Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.81      0.81      1665
           1       1.00      1.00      1.00      1708
           2       0.90      0.96      0.93      1631
           3       0.77      0.72      0.75      1710

    accuracy                           0.87      6714
   macro avg       0.87      0.87      0.87      6714
weighted avg       0.87      0.87      0.87      6714

Color Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.66      0.67       949
           1       0.51      0.60      0.55       965
           2       0.50      0.86      0.63       987
           3       0.00      0.00      0.00      1001
           4       0.52      0.91      0.66       972
           5       0.64      0.25      0.36       482
           7       0.00      0.00      0.00        24
           8       0.22      0.09      0.12        23
          10    