In [4]:
pip install flask-cors

Collecting flask-cors
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl (14 kB)
Installing collected packages: flask-cors
Successfully installed flask-cors-5.0.0
Note: you may need to restart the kernel to use updated packages.


In [10]:
from flask import Flask, request, jsonify
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import joblib
from datetime import datetime

app = Flask(__name__)

# Load the BiLSTM model
model = load_model('trained_bidirectional_lstm_model.h5')

# Load TF-IDF vectorizer and LabelEncoder (Make sure these were saved after training)
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')  # Load your trained TF-IDF vectorizer
label_encoder = joblib.load('label_encoder.pkl')  # Load the LabelEncoder for 'subject'

# Function to process the title and content using TF-IDF
def get_tfidf_embeddings(text, tfidf_vectorizer):
    return tfidf_vectorizer.transform([text]).toarray().squeeze()

# Function to process the subject with LabelEncoder
def encode_subject(subject, label_encoder):
    return label_encoder.transform([subject])[0]  # Return the encoded value for subject

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    title = data.get('title', '')
    content = data.get('content', '')
    subject = data.get('subject', '')
    date = data.get('date', '')

    # Process the text using TF-IDF
    title_tfidf = get_tfidf_embeddings(title, tfidf_vectorizer)
    content_tfidf = get_tfidf_embeddings(content, tfidf_vectorizer)

    # Combine the title and content TF-IDF embeddings
    combined_text_features = np.concatenate([title_tfidf, content_tfidf])

    # Encode the subject using LabelEncoder
    encoded_subject = encode_subject(subject, label_encoder)

    # Process the date (You can split date into various components: year, month, day)
    date_obj = datetime.strptime(date, '%Y-%m-%d')  # Assuming date is in 'YYYY-MM-DD' format
    date_features = np.array([date_obj.year, date_obj.month, date_obj.day, date_obj.weekday()])

    # Combine all features into a single input array for the model
    input_features = np.concatenate([combined_text_features, [encoded_subject], date_features])

    # Reshape input_features if needed, based on your model's input format
    input_features = input_features.reshape(1, -1)  # Adjust shape to fit your model's expected input

    # Make a prediction
    prediction = model.predict(input_features)
    predicted_label = np.argmax(prediction, axis=1)[0]  # Get the class with the highest probability

    return jsonify({'target': int(predicted_label)})

if __name__ == '__main__':
    app.run(debug=True)




FileNotFoundError: [Errno 2] No such file or directory: 'tfidf_vectorizer.pkl'