In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense




In [2]:
# 1. Randomly generate data for 25k accounts with 75 features, event flag.
num_accounts = 25000
num_features = 75
num_timesteps = 36

In [3]:
# Generating random data
np.random.seed(42)  # for reproducibility
data = {
    'Account': np.repeat(np.arange(1, num_accounts+1), num_timesteps),
    'Time Step': np.tile(np.arange(1, num_timesteps+1), num_accounts)
}
for i in range(1, num_features + 1):
    data[f'Feature_{i}'] = np.random.rand(num_accounts * num_timesteps)


In [4]:
# Introducing events randomly
event_probability = 0.05  # Adjust as needed
data['Event'] = np.random.choice([0, 1], size=num_accounts*num_timesteps, p=[1-event_probability, event_probability])

In [5]:
# Convert to DataFrame
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Account,Time Step,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,...,Feature_67,Feature_68,Feature_69,Feature_70,Feature_71,Feature_72,Feature_73,Feature_74,Feature_75,Event
0,1,1,0.37454,0.17722,0.240509,0.658199,0.652684,0.007886,0.299669,0.431278,...,0.195509,0.739435,0.594085,0.936477,0.719711,0.120982,0.36268,0.540876,0.100523,0
1,1,2,0.950714,0.425793,0.61016,0.193327,0.379541,0.08057,0.453735,0.109143,...,0.940149,0.207561,0.66108,0.762089,0.02247,0.857024,0.366775,0.459732,0.54322,0
2,1,3,0.731994,0.433131,0.440874,0.852763,0.980179,0.797293,0.408622,0.414723,...,0.089135,0.495945,0.792619,0.633261,0.98224,0.163241,0.710392,0.338058,0.739751,0
3,1,4,0.598658,0.382641,0.291918,0.264948,0.511499,0.28984,0.687179,0.704581,...,0.485015,0.309159,0.65737,0.675822,0.472532,0.065485,0.731821,0.481697,0.632003,0
4,1,5,0.156019,0.018041,0.078053,0.926069,0.325941,0.679429,0.907369,0.008192,...,0.951703,0.892886,0.295252,0.745551,0.990639,0.917855,0.610486,0.138153,0.295509,0


In [6]:
# 2. Create a machine learning model and train the generated data
# Separate features and target variable
X = df.drop(['Account', 'Time Step', 'Event'], axis=1)
y = df['Event']

In [7]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Build the neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])




In [9]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])




In [10]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x216b20025d0>

In [11]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.9500722289085388


In [12]:
# Generate random data for 100 accounts
num_accounts_new = 100

In [13]:
new_data = {
    'Account': np.repeat(np.arange(1, num_accounts_new+1), num_timesteps),
    'Time Step': np.tile(np.arange(1, num_timesteps+1), num_accounts_new),
}
for i in range(1, num_features + 1):
    new_data[f'Feature_{i}'] = np.random.rand(num_accounts_new * num_timesteps)


In [14]:
# Introducing events randomly
new_data['Event'] = np.random.choice([0, 1], size=num_accounts_new*num_timesteps, p=[1-event_probability, event_probability])


In [15]:
# Convert to DataFrame
new_df = pd.DataFrame(new_data)

# Separate features and target variable
X_new = new_df.drop(['Account', 'Time Step', 'Event'], axis=1)
y_new = new_df['Event']

In [16]:
# Make predictions on the new data
y_pred_new = (model.predict(X_new) > 0.5).astype("int32")



In [17]:
# Calculate accuracy on new data
accuracy_new = accuracy_score(y_new, y_pred_new)
print("Accuracy on new data:", accuracy_new)

Accuracy on new data: 0.9533333333333334
