### Train a model out of the previous outputs.

In [7]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# Load features and labels
features = np.load('features.npy')
labels = np.load('labels.npy')

# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define a simple model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(features.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(labels)), activation='softmax')  # Output layer with softmax for classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse_categorical_crossentropy for integer labels
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Save the model
model.save('model.h5')

# Save the label encoder if not already saved
label_encoder = LabelEncoder()
label_encoder.fit(labels)
joblib.dump(label_encoder, 'label_encoder.pkl')

print("Model training complete and saved to 'model.h5'.")


ValueError: Found input variables with inconsistent numbers of samples: [900, 131187]

### Generating Predictions (Post Training)

In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib

# Load the model
model = tf.keras.models.load_model('model.h5')

# Load features
features = np.load('features.npy')

# Load the label encoder
label_encoder = joblib.load('label_encoder.pkl')

# Generate predictions
predictions = model.predict(features)

# Get the predicted class indices
predicted_class_indices = np.argmax(predictions, axis=1)

# Decode the labels
decoded_labels = label_encoder.inverse_transform(predicted_class_indices)

# Create a DataFrame for the predictions
predictions_df = pd.DataFrame({
    'entity_name': np.repeat(['height', 'width', 'depth', 'item_weight', 'maximum_weight_recommendation', 'voltage', 'wattage', 'item_volume'], len(features) // 8),  # Adjust as necessary
    'prediction': decoded_labels
})

# Save to CSV
predictions_df.to_csv('formatted_predictions.csv', index=False)

print("Predictions have been generated and saved to 'formatted_predictions.csv'.")


OSError: No file or directory found at model.h5