In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from fastapi import FastAPI
from pydantic import BaseModel

import pandas as pd

# Load the dataset
dataset = pd.read_json('dataset.json')

# Data preprocessing
dataset.dropna(inplace=True)
dataset['externalStatus'] = dataset['externalStatus'].str.lower()
dataset.to_json('cleaned_dataset.json', index=False)


# Split the dataset into features (external status descriptions) and labels (internal status labels)
X = dataset['externalStatus']
y = dataset['internalStatus']

# Encode the internal status labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Splitting
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Tokenizing
vectorizer = CountVectorizer()
X_train_encoded = vectorizer.fit_transform(X_train)
X_val_encoded = vectorizer.transform(X_val)

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_encoded.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')  # Output layer with softmax activation for multi-class classification
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Since labels are encoded as integers
              metrics=['accuracy'])

# Train the model
X_train_encoded_dense = X_train_encoded.toarray()
X_val_encoded_dense = X_val_encoded.toarray()

history = model.fit(X_train_encoded_dense, y_train, epochs=10, batch_size=32, validation_data=(X_val_encoded_dense, y_val))


#evaluating th e model
X_val_encoded_dense = X_val_encoded.toarray()

loss, accuracy = model.evaluate(X_val_encoded_dense, y_val)
print(f'Validation Accuracy: {accuracy}')


# FastAPI app
app = FastAPI()

class InputData(BaseModel):
    externalStatus: str

# accept external status descriptions and return predicted internal status labels
@app.post("/predict")
def predict_internal_status(data: InputData):
  
    input_text = [data.externalStatus]
    input_encoded = vectorizer.transform(input_text)

    # prediction
    prediction = model.predict(input_encoded)
    predicted_label_index = np.argmax(prediction)
    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]

    return {"predictedInternalStatus": predicted_label}


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.2450 - loss: 2.6404 - val_accuracy: 0.4449 - val_loss: 2.3766
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4932 - loss: 2.1899 - val_accuracy: 0.5673 - val_loss: 1.8193
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6113 - loss: 1.5868 - val_accuracy: 0.6163 - val_loss: 1.3220
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8041 - loss: 1.0505 - val_accuracy: 0.8082 - val_loss: 0.9079
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8608 - loss: 0.6575 - val_accuracy: 0.8327 - val_loss: 0.6372
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8825 - loss: 0.4548 - val_accuracy: 0.8327 - val_loss: 0.4796
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━