In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping

# Load the dataset
data = pd.read_csv("alltogether2.4v.csv")

# Separate features and labels
X = data.drop(columns=["Material_Type"])
y = data["Material_Type"]


In [2]:
# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

print(label_encoder.classes_)

# Normalize features
X_train_normalized = (X_train - X_train.min()) / (X_train.max() - X_train.min())
X_test_normalized = (X_test - X_train.min()) / (X_train.max() - X_train.min())



# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation="softmax")
])

['ALU' 'HDPE' 'LDPE' 'PP']


In [3]:
# Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train_normalized, y_train, epochs=200, batch_size=64, validation_split=0.2, callbacks=[early_stopping])
# history = model.fit(X_train_normalized, y_train, epochs=50, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_normalized, y_test)




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [4]:
# Make predictions
predictions = model.predict(X_test_normalized)
predicted_labels = np.argmax(predictions, axis=1)
predicted_plastic_types = label_encoder.inverse_transform(predicted_labels)



# Convert predictions to percentage likelihood
percentage_likelihood = predictions.max(axis=1) * 100

# Display test accuracy and example predictions
print(f"Test Accuracy: {test_accuracy}")
for i in range(15):
    if y_test[i] == predicted_labels[i]:
        accuracy = "Correct"
    else:
        accuracy = "False"
    print(f"Sample {i+1}: {accuracy} Predicted {predicted_plastic_types[i]} with {percentage_likelihood[i]:.2f}% likelihood")


# Save the model
model.save("plastic_classifier_model.h5")

total_params = model.count_params()
total_size_bytes = total_params * 4  # Each parameter is usually a 32-bit float
total_size_mb = total_size_bytes / (1024)  # Convert bytes to megabytes

print(f"Total size of the model: {total_size_mb:.2f} KB")


Test Accuracy: 0.9210526347160339
Sample 1: Correct Predicted ALU with 98.69% likelihood
Sample 2: Correct Predicted LDPE with 77.81% likelihood
Sample 3: Correct Predicted LDPE with 64.19% likelihood
Sample 4: Correct Predicted PP with 89.57% likelihood
Sample 5: Correct Predicted ALU with 99.99% likelihood
Sample 6: Correct Predicted HDPE with 99.48% likelihood
Sample 7: Correct Predicted LDPE with 59.81% likelihood
Sample 8: Correct Predicted ALU with 99.99% likelihood
Sample 9: Correct Predicted PP with 57.04% likelihood
Sample 10: Correct Predicted ALU with 99.97% likelihood
Sample 11: Correct Predicted ALU with 99.99% likelihood
Sample 12: Correct Predicted HDPE with 85.33% likelihood
Sample 13: Correct Predicted ALU with 99.96% likelihood
Sample 14: Correct Predicted HDPE with 87.02% likelihood
Sample 15: Correct Predicted LDPE with 95.59% likelihood
Total size of the model: 74.52 KB


  saving_api.save_model(


In [12]:
# Make predictions
predictions = model.predict(X_test_normalized)

# Get the top 4 predictions for each sample
top_predictions = np.argsort(predictions, axis=1)[:, -4:]
top_labels = top_predictions[:, ::-1]

print(top_predictions)

# Reshape the top_labels array to be 1D
top_labels_1d = top_labels.reshape(-1)

# Inverse transform the labels
top_plastic_types = label_encoder.inverse_transform(top_labels_1d)

# Reshape the top_plastic_types array back to 2D
top_plastic_types = top_plastic_types.reshape(top_labels.shape)

# Get the percentage likelihoods for the top 4 predictions
percentage_likelihoods = np.take_along_axis(predictions, top_predictions, axis=1) * 100

# Display test accuracy and example predictions
print(f"Test Accuracy: {test_accuracy}")
for i in range(5):
    num_predictions = min(4, len(np.unique(top_labels[i])))
    print(f"Sample {i+1}:")
    for j in range(num_predictions):
        print(f"  Prediction {j+1}: {top_plastic_types[i][j]} with {percentage_likelihoods[i][j]:.2f}% likelihood")

# Save the model
model.save("plastic_classifier_model.h5")

[[3 2 1 0]
 [0 1 3 2]
 [0 1 3 2]
 [0 1 2 3]
 [3 1 2 0]
 [0 2 3 1]
 [0 1 3 2]
 [3 1 2 0]
 [1 0 2 3]
 [3 1 2 0]
 [3 1 2 0]
 [0 2 3 1]
 [3 1 2 0]
 [0 3 2 1]
 [1 0 3 2]
 [3 0 2 1]
 [3 1 2 0]
 [3 1 2 0]
 [1 0 2 3]
 [0 2 3 1]
 [0 1 2 3]
 [0 1 3 2]
 [0 3 2 1]
 [3 2 1 0]
 [0 2 3 1]
 [3 2 1 0]
 [0 2 3 1]
 [1 0 2 3]
 [1 0 2 3]
 [3 1 2 0]
 [0 2 3 1]
 [3 1 2 0]
 [0 1 2 3]
 [0 1 3 2]
 [0 1 3 2]
 [0 3 2 1]
 [0 1 2 3]
 [3 2 1 0]]
Test Accuracy: 0.9210526347160339
Sample 1:
  Prediction 1: ALU with 0.00% likelihood
  Prediction 2: HDPE with 0.16% likelihood
  Prediction 3: LDPE with 1.15% likelihood
  Prediction 4: PP with 98.69% likelihood
Sample 2:
  Prediction 1: LDPE with 0.10% likelihood
  Prediction 2: PP with 0.13% likelihood
  Prediction 3: HDPE with 21.96% likelihood
  Prediction 4: ALU with 77.81% likelihood
Sample 3:
  Prediction 1: LDPE with 0.02% likelihood
  Prediction 2: PP with 0.77% likelihood
  Prediction 3: HDPE with 35.02% likelihood
  Prediction 4: ALU with 64.19% likelihood
Sampl

  saving_api.save_model(


In [7]:
# Get manual input from the user
manual_input = np.array([[524161,1534725,2819437,3403538,4467703,2420628,2047009,1183458,860611,1677583,2394933,3207380,2819927,1889676,1374559]])  # Replace with your input values

# Normalize the manual input if needed
normalized_manual_input = ((manual_input - 0) / (8388608 - 0))

# Make predictions on manual input
predictions = model.predict(normalized_manual_input)
print(predictions)

# Get the top prediction
top_prediction = np.argmax(predictions)
print(top_prediction)

top_plastic_type = label_encoder.inverse_transform([top_prediction])[0]
print(top_plastic_type)
likelihood = predictions[0][top_prediction] * 100

# Display the prediction for manual input
print(f"Manual Input Prediction:")
print(f"  Prediction: {top_plastic_type} with {likelihood:.2f}% likelihood")

[[6.1377513e-01 2.1493343e-04 5.9797116e-02 3.2621279e-01]]
0
ALU
Manual Input Prediction:
  Prediction: ALU with 61.38% likelihood


In [9]:
# Make predictions from manual
manual_input = np.array([[594978, 1846783, 3119048, 3857789, 5733774, 3304025, 2417774, 1433893, 947344, 2020815, 2890104, 4187639, 3930384, 2542075, 1737344]])

# Calculate the column-wise min and max values from your original dataset
min_values = [74296, 1151721, 2379300, 2748695, 3681840, 1997402, 1742243, 942818, 392145, 795677, 1406647, 2482140, 2630495, 1748806, 1249297]
max_values = [1773075, 3755206, 7787954, 8401566, 8405071, 8401566, 7232265, 8185313, 2080604, 4455589, 8402979, 8405071, 8405071, 8403086, 6084492]

# Convert the lists to numpy arrays
min_values = np.array(min_values)
max_values = np.array(max_values)

# Normalize the manual input using the calculated min and max values
normalized_manual_input = (manual_input - min_values) / (max_values - min_values)

predictions = model.predict(normalized_manual_input)
predicted_labels = np.argmax(predictions, axis=1)
predicted_plastic_types = label_encoder.inverse_transform(predicted_labels)

    
## Calculate the percentage likelihoods
percentage_likelihoods = predictions * 100

# Get the indices of the top 4 predicted classes based on percentage likelihoods
top_indices = np.argsort(percentage_likelihoods[0])[::-1][:4]

# Get the labels for the top predicted classes
top_labels = label_encoder.inverse_transform(top_indices)

# Get the predicted label
predicted_label = top_labels[0]

# Get the true label from the original data (replace with your true label)
true_label = 'ALU'  # Replace with the true label

# Calculate the accuracy
if predicted_label == true_label:
    accuracy = "Correct"
else:
    accuracy = "False"

# Print the prediction, label, and accuracy
print(f"Predicted label: {predicted_label}")
print(f"True label: {true_label}")
print(f"Accuracy: {accuracy}")

# Print the percentages for each type
for label, likelihood in zip(top_labels, percentage_likelihoods[0][top_indices]):
    print(f"{label}: {likelihood:.2f}% likelihood")

# ... (rest of the code)

Predicted label: PP
True label: ALU
Accuracy: False
PP: 97.26% likelihood
LDPE: 2.16% likelihood
HDPE: 0.51% likelihood
ALU: 0.07% likelihood
