In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score

# 1. Dataset
data = [
    {"prompt": "Predict next month’s revenue and profit", "labels": ["revenue", "profit", "month"]},
    {"prompt": "Forecast sales for each quarter", "labels": ["sales", "quarter"]},
    {"prompt": "How are expenses changing over the year?", "labels": ["expenses", "year"]},
    {"prompt": "Show profit and cost trends by month", "labels": ["profit", "cost", "month"]},
    {"prompt": "Predict unit sales per week", "labels": ["units", "sales", "week"]},
    {"prompt": "Calculate revenue from last year", "labels": ["revenue", "year"]},
    {"prompt": "Estimate price fluctuation over months", "labels": ["price", "month"]},
    {"prompt": "Analyze cost and revenue in Q1", "labels": ["cost", "revenue", "quarter"]},
    {"prompt": "Give me the profit, cost and revenue details for each week", "labels": ["profit", "cost", "revenue", "week"]},
    {"prompt": "Generate a report of unit sales in the second quarter", "labels": ["units", "sales", "quarter"]},
]

# 2. Prepare data
df = pd.DataFrame(data)
X = df["prompt"].values
y = df["labels"].values

# 3. Tokenize prompts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
max_len = max(len(x) for x in X_seq)
X_pad = pad_sequences(X_seq, maxlen=max_len)

# 4. Encode labels
mlb = MultiLabelBinarizer()
y_enc = mlb.fit_transform(y)

# 5. Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_enc, test_size=0.2, random_state=42)

# 6. Build the model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, input_length=max_len),
    GlobalAveragePooling1D(),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='tanh'),
    Dense(len(mlb.classes_), activation='sigmoid')  # multi-label
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 7. Train the model
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=8,
    validation_split=0.2,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True)]
)

# 8. Evaluate
y_pred = model.predict(X_test)
y_pred_bin = (y_pred > 0.5).astype(int)

print("📊 Classification Report:")
print(classification_report(y_test, y_pred_bin, target_names=mlb.classes_))




Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.1667 - loss: 0.6925 - val_accuracy: 0.0000e+00 - val_loss: 0.6903
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - accuracy: 0.1667 - loss: 0.6882 - val_accuracy: 0.0000e+00 - val_loss: 0.6875
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - accuracy: 0.1667 - loss: 0.6839 - val_accuracy: 0.0000e+00 - val_loss: 0.6845
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - accuracy: 0.1667 - loss: 0.6790 - val_accuracy: 0.0000e+00 - val_loss: 0.6812
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - accuracy: 0.1667 - loss: 0.6735 - val_accuracy: 0.0000e+00 - val_loss: 0.6774
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.1667 - loss: 0.6670 - val_accuracy: 0.0000e+00 - val_loss: 0.6731
Epoch 7/30
[1m1/1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:

# 🔒 Save the model + tokenizer + label binarizer
model.save("predict_model.h5")
import pickle
with open("tokenizer.pkl", "wb") as f: pickle.dump(tokenizer, f)
with open("label_binarizer.pkl", "wb") as f: pickle.dump(mlb, f)