In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense

In [None]:
data = pd.read_excel('model_input.xlsx')

In [None]:
data.head()

Unnamed: 0,mob_mtched_lylty_id,age,gender,overall_recency,overall_ats,overall_frequency,smart_recency,smart_ats,smart_frequency,tagger
0,700000001722,55,MALE,899.0,5397.5,2.0,899.0,4995.0,1.0,0
1,700000007935,42,MALE,187.0,8303.5714,7.0,187.0,3495.0,1.0,0
2,700000008891,23,,1343.0,4625.8333,6.0,1347.0,2195.0,1.0,0
3,700000013693,51,MALE,1327.0,3018.6363,11.0,2567.0,1995.0,1.0,0
4,700000015706,33,MALE,576.0,3291.1111,9.0,1873.0,1595.0,1.0,0


In [6]:
data['gender'].value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
MALE,463246
FEMALE,164430


In [7]:
data = data.drop(['gender','mob_mtched_lylty_id'], axis = 1)

In [8]:
data.head()

Unnamed: 0,age,overall_recency,overall_ats,overall_frequency,smart_recency,smart_ats,smart_frequency,tagger
0,55,899.0,5397.5,2.0,899.0,4995.0,1.0,0
1,42,187.0,8303.5714,7.0,187.0,3495.0,1.0,0
2,23,1343.0,4625.8333,6.0,1347.0,2195.0,1.0,0
3,51,1327.0,3018.6363,11.0,2567.0,1995.0,1.0,0
4,33,576.0,3291.1111,9.0,1873.0,1595.0,1.0,0


In [11]:
data[data['tagger']==1].isna().sum()

Unnamed: 0,0
age,0
overall_recency,2
overall_ats,2
overall_frequency,2
smart_recency,14
smart_ats,14
smart_frequency,14
tagger,0


In [10]:
data.tagger.value_counts()

Unnamed: 0_level_0,count
tagger,Unnamed: 1_level_1
0,1044915
1,3660


In [14]:
data.dropna(inplace=True)

In [15]:
data.isna().sum()

Unnamed: 0,0
age,0
overall_recency,0
overall_ats,0
overall_frequency,0
smart_recency,0
smart_ats,0
smart_frequency,0
tagger,0


In [16]:
data.shape

(1032190, 8)

In [17]:
data.head()

Unnamed: 0,age,overall_recency,overall_ats,overall_frequency,smart_recency,smart_ats,smart_frequency,tagger
0,55,899.0,5397.5,2.0,899.0,4995.0,1.0,0
1,42,187.0,8303.5714,7.0,187.0,3495.0,1.0,0
2,23,1343.0,4625.8333,6.0,1347.0,2195.0,1.0,0
3,51,1327.0,3018.6363,11.0,2567.0,1995.0,1.0,0
4,33,576.0,3291.1111,9.0,1873.0,1595.0,1.0,0


In [18]:
X_train, X_test, y_train, y_test = train_test_split(data.drop('tagger', axis=1), data['tagger'], test_size=0.2, random_state=42)

In [19]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [20]:
import pickle

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler, file)

In [21]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [24]:
from tensorflow.keras.models import Sequential

In [25]:
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_resampled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1,activation = 'sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
model.summary()

In [28]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

In [29]:
import datetime
log_dir = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [30]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss',patience = 10,restore_best_weights=True)

In [31]:
history = model.fit(
    X_train_resampled, y_train_resampled, validation_data = (X_test, y_test), epochs = 100, callbacks = [tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2ms/step - accuracy: 0.7014 - loss: 0.5607 - val_accuracy: 0.7122 - val_loss: 0.5153
Epoch 2/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 2ms/step - accuracy: 0.7329 - loss: 0.5212 - val_accuracy: 0.6564 - val_loss: 0.5888
Epoch 3/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 2ms/step - accuracy: 0.7478 - loss: 0.5021 - val_accuracy: 0.7126 - val_loss: 0.5175
Epoch 4/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 2ms/step - accuracy: 0.7556 - loss: 0.4910 - val_accuracy: 0.6842 - val_loss: 0.5393
Epoch 5/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 2ms/step - accuracy: 0.7607 - loss: 0.4829 - val_accuracy: 0.7005 - val_loss: 0.5290
Epoch 6/100
[1m51427/51427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 2ms/step - accuracy: 0.7657 - loss: 0.4761 - val_accuracy: 0.7596 - val

In [32]:
model.save('smart.h5')



In [33]:
from tensorflow.keras.models import load_model


In [None]:
model = load_model('smart.h5')

with open('scaler.pkl','rb') as file:
    scaler = pickle.load(file)

input_data = {
    'age' : 34,
    'overall_recency' : 567,
    'overall_ats' : 7500,
    'overall_frequency' : 5,
    'smart_recency' : 1200,
    'smart_ats' : 7000,
    'smart_frequency' : 1
}


scaled_input = scaler.transform(pd.DataFrame([input_data]))
prediction = model.predict(scaled_input)
print(prediction)