In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate, TextVectorization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling1D

# Load the balanced dataset
df = pd.read_csv("balanced_liked_dataset.csv")

# Handle missing titles (fill with empty string)
df['title'] = df['title'].fillna('')

# Split features
numerical_features = df[['comment_count', 'view_count', 'exit_count', 'rating_count',
                         'average_rating', 'post_count', 'follower_count', 'following_count']].values

titles = df['title'].values
labels = df['liked'].values

# Scale numerical features
scaler = StandardScaler()
numerical_features_scaled = scaler.fit_transform(numerical_features)

# Split into train/test
X_num_train, X_num_test, X_title_train, X_title_test, y_train, y_test = train_test_split(
    numerical_features_scaled, titles, labels, test_size=0.2, random_state=42)

# Title vectorization
max_tokens = 5000
output_sequence_length = 20

title_vectorizer = TextVectorization(max_tokens=max_tokens, output_sequence_length=output_sequence_length)
title_vectorizer.adapt(X_title_train)

# Model inputs
numerical_input = Input(shape=(8,), name="numerical_input")
title_input = Input(shape=(), dtype="string", name="title_input")
title_vector = title_vectorizer(title_input)
title_embed = Embedding(input_dim=max_tokens, output_dim=16)(title_vector)
title_pooled = GlobalAveragePooling1D()(title_embed)

# Concatenate
merged = Concatenate()([numerical_input, title_pooled])

# Dense layers
x = Dense(64, activation="relu")(merged)
x = Dense(32, activation="relu")(x)
output = Dense(1, activation="sigmoid")(x)

# Build model
model = Model(inputs=[numerical_input, title_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

# Summary
model.summary()

# from keras.callbacks import EarlyStopping
# early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


# Train the model
history = model.fit(
    {"numerical_input": X_num_train, "title_input": X_title_train},
    y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=32
    # callbacks=[early_stop]
)

# Evaluate
loss, accuracy = model.evaluate(
    {"numerical_input": X_num_test, "title_input": X_title_test},
    y_test
)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


In [None]:
# from tensorflow.keras import backend as K
# K.clear_session()
model.save('my_model1.keras')