In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model

In [2]:
campaign_df = pd.read_csv("../data/campaigns.csv").rename(columns={"id": "campaign_id"})
event_df = pd.read_csv("../data/events.csv")
order_df = pd.read_csv("../data/orders.csv")

In [3]:
campaign_df["to_date"] = pd.to_datetime(campaign_df["to_date"])
campaign_df["from_date"] = pd.to_datetime(campaign_df["from_date"])
campaign_df["duration"] = (campaign_df["to_date"] - campaign_df["from_date"]).dt.days

In [4]:
event_df["id"] = event_df["campaign_id"].astype(str) + "-" + event_df["uid"] + "-" + event_df["timestamp"].astype(str)
event_df = event_df.groupby("campaign_id")["id"].agg(["count", "nunique"]).reset_index()
event_df = event_df.rename(columns={"count": "impression", "nunique": "reach"})

In [5]:
order_df = order_df.merge(
    campaign_df[["campaign_id", "discount"]],
    how="left",
    on="campaign_id"
).fillna(0)
order_df["total"] = (order_df["subtotal"] - order_df["discount"]).apply(abs)
order_df = order_df.groupby("campaign_id").agg(revenue=("total", "sum")).reset_index()

In [6]:
df = campaign_df.merge(
    event_df,
    "left",
    "campaign_id"
).merge(
    order_df,
    "left",
    on="campaign_id"
)
df = df[["campaign_id", 'message', 'discount', 'channel', 'duration', 'reach', 'impression', 'revenue']].set_index("campaign_id")
df

Unnamed: 0_level_0,message,discount,channel,duration,reach,impression,revenue
campaign_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Mauris lacinia sapien quis libero.,208336,2,13,36,36,7874026.0
2,"Morbi odio odio, elementum eu, interdum eu, ti...",168279,1,13,34,34,8626189.0
3,Suspendisse potenti.,121208,1,18,31,31,8535064.0
4,Maecenas pulvinar lobortis est.,51187,2,6,30,30,13376904.0
5,Aenean sit amet justo.,321561,1,13,34,34,5225609.0
6,Ut tellus.,205002,1,20,36,36,7361549.0
7,"In tempor, turpis nec euismod scelerisque, qua...",85811,1,9,34,34,11034949.0
8,Cum sociis natoque penatibus et magnis dis par...,255076,2,16,43,43,5901392.0
9,In hac habitasse platea dictumst.,144461,1,30,31,31,9539272.0
10,Etiam pretium iaculis justo.,261198,2,18,37,37,10461180.0


In [7]:
X = df[['message', 'discount', 'channel', 'duration']]
y = df[['reach', 'impression', 'revenue']]

tfidf = TfidfVectorizer(max_features=10)
X_message = tfidf.fit_transform(X['message']).toarray()

scaler = StandardScaler()
X_numerical = scaler.fit_transform(X[['discount', 'channel', 'duration']])

X_processed = np.hstack([X_message, X_numerical])

X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))

model.add(Dense(3))
model.compile(optimizer=Adam(), loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 114ms/step - loss: 29482123526144.0000 - val_loss: 33173197029376.0000
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 29221107793920.0000 - val_loss: 33173194932224.0000
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 28924289482752.0000 - val_loss: 33173194932224.0000
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 24450267348992.0000 - val_loss: 33173194932224.0000
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 24649492594688.0000 - val_loss: 33173192835072.0000
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 24449510277120.0000 - val_loss: 33173188640768.0000
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 24880573579264.0000 - val_loss: 33173188640768.0000
Epoch 8/5

<keras.src.callbacks.history.History at 0x22b372ea9f0>

In [8]:
model.save('../model/simulator.keras')
joblib.dump(tfidf, '../model/tfidf_vectorizer.pkl')
joblib.dump(scaler, '../model/scaler.pkl')

['../model/scaler.pkl']