In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load and preprocess your dataset
data = pd.read_csv('new_dataset_filled.csv')

# Encode product types and modules
encoder_product_type = LabelEncoder()
encoder_module = LabelEncoder()
data['Product Type'] = encoder_product_type.fit_transform(data['Product Type'])
data['Module'] = encoder_module.fit_transform(data['Module'])

# Additional Features
data['Defect_Rate'] = data['Defect Qty'] / data['Total SMV']
data['Avg_Diff'] = data['Actual Per Day'] - data['Avg Per Day']
data['Defect_Ratio'] = data['Defect Qty'] / data['Total SMV']

# Split the data into features and labels
X = data[['Product Type', 'Defect_Rate', 'Avg_Diff', 'Defect_Ratio']].values
y = data['Module'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the number of unique product types and modules
num_product_types = len(encoder_product_type.classes_)
num_modules = len(encoder_module.classes_)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=num_product_types, output_dim=32, input_length=1))
model.add(LSTM(64))
model.add(Dense(num_modules, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy}')

# Save the model for future use
model.save('module_prediction_rnn_model')
