In [32]:
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import re
import joblib

In [33]:
def clean_text(text):
    return re.sub(r'\W+', ' ', text).lower().strip()

# Load the tokenizer
with open('tokenizer.json') as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)

# Load the MultiLabelBinarizer
mlb = joblib.load('mlb.pkl')

# Load the trained model
model = load_model('movie_genre_classifier.h5')

# New movie descriptions to predict
new_movie_descriptions = ["Swathi dreams of owning a restaurant. But when murder derails her plan, she and her lover take an outrageous step - to replace him with her husband.",
                          "Drea and Eleanor agree to go after one another's bullies."]

# Clean and tokenize new descriptions
new_movie_descriptions_clean = [clean_text(desc) for desc in new_movie_descriptions]
sequences = tokenizer.texts_to_sequences(new_movie_descriptions_clean)

# IMPORTANT: Use the same max_length as during training
max_length = 78  # Replace 78 with the actual max length from your training process
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Load the model
model = load_model('movie_genre_classifier.h5')

# Make predictions
predictions = model.predict(padded_sequences)

# Decode predictions
for idx, desc in enumerate(new_movie_descriptions):
    print(f"Description: {desc}")
    print("Predicted genres:")
    predicted_labels = [mlb.classes_[i] for i, prob in enumerate(predictions[idx]) if prob > 0.55]
    print(predicted_labels if predicted_labels else "No genres predicted with high confidence.")
    print("\n")




Description: Swathi dreams of owning a restaurant. But when murder derails her plan, she and her lover take an outrageous step - to replace him with her husband.
Predicted genres:
['Comedy', 'Crime', 'Drama']


Description: Drea and Eleanor agree to go after one another's bullies.
Predicted genres:
['Comedy']


