In [7]:
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
import numpy as np


In [8]:
# Load the JSON data
with open('data/intent_data.json', 'r') as file:
    data = json.load(file)

# Create a DataFrame from the JSON data
df = pd.DataFrame(data)

# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Extract the text and labels from the DataFrame
train_texts = train_df['statement'].tolist()
train_labels = train_df['intent'].tolist()
test_texts = test_df['statement'].tolist()
test_labels = test_df['intent'].tolist()


In [9]:
vectorizer = TfidfVectorizer()
train_vectors = vectorizer.fit_transform(train_texts)
test_vectors = vectorizer.transform(test_texts)

classifier = LogisticRegression()
classifier.fit(train_vectors, train_labels)

In [10]:
predicted_labels = classifier.predict(test_vectors)

# Print classification report
print(classification_report(test_labels, predicted_labels))


                precision    recall  f1-score   support

     calculate       1.00      1.00      1.00        15
 check_weather       1.00      1.00      1.00        10
get_directions       1.00      1.00      1.00         8
      get_time       1.00      1.00      1.00         5
      greeting       1.00      0.75      0.86         8
      open_app       1.00      1.00      1.00        19
    send_email       1.00      1.00      1.00         8
  send_message       1.00      1.00      1.00         7
     set_alarm       1.00      1.00      1.00         8
     set_timer       1.00      1.00      1.00         4
     translate       0.75      1.00      0.86         6
turn_on_device       1.00      1.00      1.00        20

      accuracy                           0.98       118
     macro avg       0.98      0.98      0.98       118
  weighted avg       0.99      0.98      0.98       118



In [11]:
import pickle

predicted_labels = classifier.predict(test_vectors)
report = classification_report(test_labels, predicted_labels)
print("Classification Report:\n", report)

with open('models/intent_classifier_model.pkl', 'wb') as file:
    pickle.dump(classifier, file)
# Save the vectorizer to disk
with open('models/intent_classifier_vectorizer_model.pkl', 'wb') as file:
    pickle.dump(vectorizer, file)

Classification Report:
                 precision    recall  f1-score   support

     calculate       1.00      1.00      1.00        15
 check_weather       1.00      1.00      1.00        10
get_directions       1.00      1.00      1.00         8
      get_time       1.00      1.00      1.00         5
      greeting       1.00      0.75      0.86         8
      open_app       1.00      1.00      1.00        19
    send_email       1.00      1.00      1.00         8
  send_message       1.00      1.00      1.00         7
     set_alarm       1.00      1.00      1.00         8
     set_timer       1.00      1.00      1.00         4
     translate       0.75      1.00      0.86         6
turn_on_device       1.00      1.00      1.00        20

      accuracy                           0.98       118
     macro avg       0.98      0.98      0.98       118
  weighted avg       0.99      0.98      0.98       118



In [None]:
train_x = df["statement"]
train_y = df["intent"]

# Tokenize the input data
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train_x)
train_x = tokenizer.texts_to_sequences(train_x)

In [None]:
# Pad the input sequences
max_length = max(len(seq) for seq in train_x)
train_x = tf.keras.preprocessing.sequence.pad_sequences(
    train_x, maxlen=max_length)

# Convert the output labels to one-hot encoding
label_encoder = tf.keras.preprocessing.text.LabelEncoder()
label_encoder.fit(train_y)
train_y = label_encoder.transform(train_y)
train_y = tf.keras.utils.to_categorical(train_y)


In [None]:
# Define the model architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(
        len(tokenizer.word_index) + 1, 128, input_length=max_length),
    tf.keras.layers.Conv1D(64, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])


In [None]:
# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_x, train_y, epochs=10, batch_size=16)


In [None]:
# Example inference
text = "What's the time?"
input_seq = tokenizer.texts_to_sequences([text])
input_seq = tf.keras.preprocessing.sequence.pad_sequences(
    input_seq, maxlen=max_length)
predictions = model.predict(input_seq)
predicted_label = label_encoder.inverse_transform(np.argmax(predictions))
print(f"Predicted label: {predicted_label}")
