In [1]:
from flask import Flask, render_template, request
import numpy as np
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
import os
import io
import base64
from tensorflow.keras.models import load_model
from pyngrok import ngrok, conf
import getpass
import threading
from flasgger import Swagger
from flask_cors import CORS
from datetime import datetime


2025-02-13 13:13:27.228206: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
os.environ["FLASK_DEBUG"] = "development"

app = Flask(__name__)
swagger = Swagger(app)
CORS(app)
port = 5001

In [None]:
model = load_model(os.path.join(code_path,'hashtag_classification_Model_98ACC.keras'))

In [None]:

def predict_popularity_category2(new_df, model, caption_tokenizer, hashtag_tokenizer, scaler, structured_columns,
                                caption_maxlen=50, hashtag_maxlen=3):
    """
    Predicts the popularity_category for new data using the trained classification model that includes hashtag features.
    
    Parameters:
    - new_df (pd.DataFrame): New data with the following columns:
         'caption', 'dimensionsHeight', 'dimensionsWidth',
         'hashtags/0', 'hashtags/1', 'hashtags/2',
         'productType', 'type', 'day_of_week', 'season', 'month_of_year'
         (Do not include the target column 'popularity_category'.)
    - model (tf.keras.Model): The trained Keras classification model.
    - caption_tokenizer (Tokenizer): The fitted Keras Tokenizer used for caption text processing.
    - hashtag_tokenizer (Tokenizer): The fitted Keras Tokenizer used for hashtag text processing.
    - scaler (StandardScaler): The fitted StandardScaler for numeric features.
    - structured_columns (list or pd.Index): Column names of the structured features as produced by pd.get_dummies during training.
    - caption_maxlen (int): Maximum length for padded caption sequences (default is 50).
    - hashtag_maxlen (int): Maximum length for padded hashtag sequences (default is 3).
    
    Returns:
    - predictions (list): A list of predicted popularity categories as strings ('Low', 'Medium', or 'High') for each row.
    """
    # --- Process Structured Features ---
    # Drop text-related columns (caption and hashtag columns) to create the structured branch.
    structured_data = new_df.drop(['caption', 'hashtags/0', 'hashtags/1', 'hashtags/2'], axis=1)
    
    # One-hot encode the categorical columns.
    structured_data = pd.get_dummies(
        structured_data,
        columns=['productType', 'type', 'day_of_week', 'season', 'month_of_year']
    )
    
    # Reindex the DataFrame to ensure it has the same columns as used in training.
    structured_data = structured_data.reindex(columns=structured_columns, fill_value=0)
    
    # Scale the numeric columns (assuming they are 'dimensionsHeight' and 'dimensionsWidth').
    structured_data[['dimensionsHeight', 'dimensionsWidth']] = scaler.transform(
        structured_data[['dimensionsHeight', 'dimensionsWidth']]
    )
    
    # Convert to a float32 NumPy array.
    X_structured = structured_data.values.astype('float32')
    
    # --- Process Caption Text Features ---
    # Convert the 'caption' column to a list of strings.
    captions = new_df['caption'].astype(str).tolist()
    # Tokenize and pad the caption sequences.
    caption_sequences = caption_tokenizer.texts_to_sequences(captions)
    X_text = pad_sequences(caption_sequences, maxlen=caption_maxlen).astype('int32')
    
    # --- Process Hashtag Features ---
    # Combine the hashtag columns into one string per sample.
    hashtags_combined = new_df[['hashtags/0', 'hashtags/1', 'hashtags/2']].astype(str).agg(" ".join, axis=1)
    # Tokenize and pad the combined hashtag text.
    hashtag_sequences = hashtag_tokenizer.texts_to_sequences(hashtags_combined)
    X_hashtag = pad_sequences(hashtag_sequences, maxlen=hashtag_maxlen).astype('int32')
    
    # --- Make Predictions ---
    # The model expects three inputs: structured features, caption text, and hashtag text.
    probs = model.predict([X_structured, X_text, X_hashtag])
    predicted_indices = np.argmax(probs, axis=1)
    
    # Map numeric predictions to human-readable categories.
    answer = []
    for score in predicted_indices:
        if score == 0:
            answer.append('Low')
        elif score == 1:
            answer.append('Medium')
        elif score == 2:
            answer.append('High')
    
    print(f'Scale: Low(0-100), Medium(101-600), High(601 or More)')
    return answer

# -----------------------------
# Example usage:
# -----------------------------
# new_data should be a DataFrame with the columns:
# 'caption', 'dimensionsHeight', 'dimensionsWidth',
# 'hashtags/0', 'hashtags/1', 'hashtags/2',
# 'productType', 'type', 'day_of_week', 'season', 'month_of_year'
new_data = pd.DataFrame({
    'caption': ["Amazing product, works great!", "Terrible product, not recommended."],
    'dimensionsHeight': [1500, 200],
    'dimensionsWidth': [1000, 120],
    'hashtags/0': ["Wedding", "bad"],
    'hashtags/1': ["trendy", "worse"],
    'hashtags/2': ["sale", "cheap"],
    'productType': ["clips", "TypeB"],
    'type': ["Video", "SubType2"],
    'day_of_week': ["Monday", "Tuesday"],
    'season': ["Summer", "Winter"],
    'month_of_year': [6, 12]
})

# Assuming that the variables 'hashtag_classification_model', 'caption_tokenizer', 'hashtag_tokenizer', 
# 'scaler', and 'structured_columns' are defined (from training), call the prediction function:
predicted_categories = predict_popularity_category2(new_data, hashtag_classification_model,
                                                   caption_tokenizer, hashtag_tokenizer,
                                                   scaler, structured_columns)
print("Predicted popularity categories:", predicted_categories)
