In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
import tensorflow as tf

# Load your dataset
df = pd.read_csv("train_dataset/event_dataset.csv")  # Replace with actual path

# Feature & target separation
X = df[['karma', 'event_fomo_score']]
y = df['should_nudge_event']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# Early stopping
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

print(classification_report(y_test, y_pred))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.5583 - loss: 0.8547 - precision_1: 0.6876 - recall_1: 0.6334 - val_accuracy: 0.6469 - val_loss: 0.6488 - val_precision_1: 0.7026 - val_recall_1: 0.8393
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7239 - loss: 0.5723 - precision_1: 0.7778 - recall_1: 0.8307 - val_accuracy: 0.6237 - val_loss: 0.6513 - val_precision_1: 0.7211 - val_recall_1: 0.7342
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8202 - loss: 0.4120 - precision_1: 0.8438 - recall_1: 0.9055 - val_accuracy: 0.7113 - val_loss: 0.6113 - val_precision_1: 0.7035 - val_recall_1: 0.9991
Epoch 4/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8390 - loss: 0.3583 - precision_1: 0.8612 - recall_1: 0.9151 - val_accuracy: 0.7156 - val_loss: 0.5703 - val_precision_1: 0.7481 - val_reca

In [10]:
df.value_counts('should_nudge_event')

should_nudge_event
1    6791
0    3209
Name: count, dtype: int64

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score

test_df = pd.read_csv('test_dataset/test_event_dataset.csv')

X_test_new = test_df[['karma', 'event_fomo_score']]
y_test_new = test_df['should_nudge_event']

y_pred_new = (model.predict(X_test_new) > 0.5).astype(int)

# Print metrics
print("\nMetrics on Test Dataset:")
print("\nClassification Report:")
print(classification_report(y_test_new, y_pred_new))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test_new, y_pred_new))

print("\nAccuracy Score:", accuracy_score(y_test_new, y_pred_new))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Metrics on Test Dataset:

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.89      0.84      1207
           1       0.80      0.65      0.72       793

    accuracy                           0.80      2000
   macro avg       0.80      0.77      0.78      2000
weighted avg       0.80      0.80      0.79      2000


Confusion Matrix:
[[1079  128]
 [ 274  519]]

Accuracy Score: 0.799


In [15]:
model.save('models/model_event.h5')



In [13]:
# Create and train Random Forest model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Initialize the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on original test set
rf_pred = rf_model.predict(X_test)
print("\nRandom Forest Results on Original Test Set:")
print("\nClassification Report:")
print(classification_report(y_test, rf_pred))

# Predict on new test dataset
rf_pred_new = rf_model.predict(X_test_new)

print("\nRandom Forest Results on New Test Dataset:")
print("\nClassification Report:")
print(classification_report(y_test_new, rf_pred_new))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test_new, rf_pred_new))

print("\nAccuracy Score:", accuracy_score(y_test_new, rf_pred_new))


Random Forest Results on Original Test Set:

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       642
           1       0.97      0.98      0.97      1358

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000


Random Forest Results on New Test Dataset:

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.90      0.85      1207
           1       0.82      0.66      0.73       793

    accuracy                           0.81      2000
   macro avg       0.81      0.78      0.79      2000
weighted avg       0.81      0.81      0.80      2000


Confusion Matrix:
[[1089  118]
 [ 272  521]]

Accuracy Score: 0.805


In [14]:
# Save Random Forest model using joblib
from joblib import dump
dump(rf_model, 'models/rf_model_event.joblib')


['models/rf_model_event.joblib']