### Pre Processor

In [4]:
## Import all necessary libraries
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
import pickle
import warnings
warnings.filterwarnings("ignore")


In [5]:
## load the dataset
df = pd.read_csv('qa_test_cases.csv')

In [6]:
df.head()

Unnamed: 0,Test_Case_ID,Functionality,Story_Type,Core_Function,Affects_Workflow,Has_Workaround,Impact_Level,Severity_Score,Tester_Name,Time_Taken_Minutes,Priority_Label
0,TC001,Login,React Story,No,No,Yes,,2,Charlie,22,No Issues
1,TC002,Payment Gateway,AEM Story,Yes,No,Yes,Low,2,Alice,30,P3
2,TC003,Search,React Story,Yes,Yes,Yes,,0,Charlie,19,No Issues
3,TC004,Checkout,React Story,No,Yes,Yes,,5,Charlie,22,No Issues
4,TC005,Profile Management,React Story,Yes,No,Yes,,0,Alice,28,No Issues


In [7]:
## Drop unnecessary columns
df.drop(columns=['Test_Case_ID','Impact_Level'], inplace=True)

In [8]:
df.head()

Unnamed: 0,Functionality,Story_Type,Core_Function,Affects_Workflow,Has_Workaround,Severity_Score,Tester_Name,Time_Taken_Minutes,Priority_Label
0,Login,React Story,No,No,Yes,2,Charlie,22,No Issues
1,Payment Gateway,AEM Story,Yes,No,Yes,2,Alice,30,P3
2,Search,React Story,Yes,Yes,Yes,0,Charlie,19,No Issues
3,Checkout,React Story,No,Yes,Yes,5,Charlie,22,No Issues
4,Profile Management,React Story,Yes,No,Yes,0,Alice,28,No Issues


In [9]:
## Level-encode categorical variables
label_encoder=LabelEncoder()
categorical_cols = ['Priority_Label', 'Core_Function', 'Affects_Workflow', 'Has_Workaround']
for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])

In [10]:
df.head()

Unnamed: 0,Functionality,Story_Type,Core_Function,Affects_Workflow,Has_Workaround,Severity_Score,Tester_Name,Time_Taken_Minutes,Priority_Label
0,Login,React Story,0,0,1,2,Charlie,22,0
1,Payment Gateway,AEM Story,1,0,1,2,Alice,30,3
2,Search,React Story,1,1,1,0,Charlie,19,0
3,Checkout,React Story,0,1,1,5,Charlie,22,0
4,Profile Management,React Story,1,0,1,0,Alice,28,0


In [11]:
## One-hot encode the 'Functionality' column

onehot_encoder_func=OneHotEncoder()
func_encoder=onehot_encoder_func.fit_transform(df[['Functionality']])

In [12]:
onehot_encoder_func.get_feature_names_out(['Functionality'])

array(['Functionality_Checkout', 'Functionality_Login',
       'Functionality_Payment Gateway',
       'Functionality_Profile Management', 'Functionality_Search'],
      dtype=object)

In [13]:
func_encoded_df=pd.DataFrame(func_encoder.toarray(),columns=onehot_encoder_func.get_feature_names_out(['Functionality']))
func_encoded_df.head()

Unnamed: 0,Functionality_Checkout,Functionality_Login,Functionality_Payment Gateway,Functionality_Profile Management,Functionality_Search
0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0


In [14]:
## Combine the func encoded columns with the original dataset
df=pd.concat([df.drop('Functionality',axis=1),func_encoded_df],axis=1) 
df.head()

Unnamed: 0,Story_Type,Core_Function,Affects_Workflow,Has_Workaround,Severity_Score,Tester_Name,Time_Taken_Minutes,Priority_Label,Functionality_Checkout,Functionality_Login,Functionality_Payment Gateway,Functionality_Profile Management,Functionality_Search
0,React Story,0,0,1,2,Charlie,22,0,0.0,1.0,0.0,0.0,0.0
1,AEM Story,1,0,1,2,Alice,30,3,0.0,0.0,1.0,0.0,0.0
2,React Story,1,1,1,0,Charlie,19,0,0.0,0.0,0.0,0.0,1.0
3,React Story,0,1,1,5,Charlie,22,0,1.0,0.0,0.0,0.0,0.0
4,React Story,1,0,1,0,Alice,28,0,0.0,0.0,0.0,1.0,0.0


In [15]:
## One-hot encode the 'Story_Type' column

onehot_encoder_ST=OneHotEncoder()
ST_encoder=onehot_encoder_ST.fit_transform(df[['Story_Type']])

onehot_encoder_ST.get_feature_names_out(['Story_Type'])

ST_encoded_df=pd.DataFrame(ST_encoder.toarray(),columns=onehot_encoder_ST.get_feature_names_out(['Story_Type']))

df=pd.concat([df.drop('Story_Type',axis=1),ST_encoded_df],axis=1) 

df.head()

Unnamed: 0,Core_Function,Affects_Workflow,Has_Workaround,Severity_Score,Tester_Name,Time_Taken_Minutes,Priority_Label,Functionality_Checkout,Functionality_Login,Functionality_Payment Gateway,Functionality_Profile Management,Functionality_Search,Story_Type_AEM Story,Story_Type_API Gateway Changes,Story_Type_Database Update,Story_Type_React Story,Story_Type_UI Enhancement
0,0,0,1,2,Charlie,22,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,0,1,2,Alice,30,3,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1,1,1,0,Charlie,19,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0,1,1,5,Charlie,22,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,0,1,0,Alice,28,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [16]:
## One-hot encode the 'Tester_Name' column

onehot_encoder_TN=OneHotEncoder()
TN_encoder=onehot_encoder_TN.fit_transform(df[['Tester_Name']])

onehot_encoder_TN.get_feature_names_out(['Tester_Name'])

TN_encoded_df=pd.DataFrame(TN_encoder.toarray(),columns=onehot_encoder_TN.get_feature_names_out(['Tester_Name']))

df=pd.concat([df.drop('Tester_Name',axis=1),TN_encoded_df],axis=1) 

df.head()

Unnamed: 0,Core_Function,Affects_Workflow,Has_Workaround,Severity_Score,Time_Taken_Minutes,Priority_Label,Functionality_Checkout,Functionality_Login,Functionality_Payment Gateway,Functionality_Profile Management,Functionality_Search,Story_Type_AEM Story,Story_Type_API Gateway Changes,Story_Type_Database Update,Story_Type_React Story,Story_Type_UI Enhancement,Tester_Name_Alice,Tester_Name_Bob,Tester_Name_Charlie
0,0,0,1,2,22,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,1,0,1,2,30,3,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1,1,1,0,19,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0,1,1,5,22,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,1,0,1,0,28,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [17]:
## Save all encoders 
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

with open('onehot_encoder_func.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_func, f)

with open('onehot_encoder_ST.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_ST, f)

with open('onehot_encoder_TN.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_TN, f)

In [18]:
## Divide the dataset into features and target variable

X = df.drop('Priority_Label', axis=1)
y = df['Priority_Label']

## Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

## Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
## Save the scaler

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [20]:
y_test.unique()

array([3, 0, 2, 1])

### ANN Implementation

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime




In [27]:
## Build the ANN model

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)), ## Input layer with 64 neurons
    Dense(32, activation='relu'), ## Hidden layer with 32 neurons
    Dense(4, activation='softmax') ## Output layer for binary classification
])

In [28]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                1216      
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 4)                 132       
                                                                 
Total params: 3428 (13.39 KB)
Trainable params: 3428 (13.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [29]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001) ## Define the optimizer

In [36]:
model.compile(
    optimizer=opt,
    loss='sparse_categorical_crossentropy',   ## multi-class one-hot labels
    metrics=['accuracy']
)

In [37]:
## Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [38]:
## Train the model
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=100,
                    callbacks=[early_stopping])

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


In [39]:
model.save('model.h5')  ## Save the trained model