In [154]:
# Recurrent Neural Networks (RNN):
# If you have sequence-related features, you can use RNNs.
# LSTM or GRU layers can be suitable for capturing sequential patterns.
# Define the input shape considering the sequence length.
# Design the RNN architecture and connect it to fully connected layers for classification.

# 2. Model Training:
# Compile the models with appropriate loss functions (e.g., binary cross-entropy) and optimizers (e.g., Adam).
# Train the models on the training data.
# Monitor performance on the validation set to avoid overfitting.
# Tweak hyperparameters if necessary.

# 4. Model Evaluation:
# Evaluate the models on the test set using metrics like accuracy, precision, recall, F1-score, ROC AUC, etc.
# Compare the performance of different models to choose the best one.

In [155]:
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from scipy.sparse import issparse
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dense, LSTM, Embedding, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [157]:
# all the work doen lets see if how data set hasnt traned into a super saiyen 2 yet :)
df.head(6)

Unnamed: 0,account length,location code,user id,credit card info save,push status,add to wishlist,desktop sessions,app sessions,desktop transactions,total product detail views,session duration,promotion clicks,avg order value,sale product views,discount rate per visited products,product detail view per app session,app transactions,add to cart per session,customer service calls,churn
0,128,415,3824657,no,yes,25,265,45,17,110,197,87,2447,91,1101,10,3,27,1,0
1,107,415,3717191,no,yes,26,162,27,17,123,196,103,2544,103,1145,137,3,37,1,0
2,137,415,3581921,no,no,0,243,41,10,114,121,110,1626,104,732,122,5,329,0,0
3,84,408,3759999,yes,no,0,299,51,5,71,62,88,1969,89,886,66,7,178,2,0
4,75,415,3306626,yes,no,0,167,28,13,113,148,122,1869,121,841,101,3,273,3,0
5,118,510,3918027,yes,no,0,223,38,19,98,221,101,2039,118,918,63,6,17,0,0


In [158]:
# Preprocessing
# Convert categorical variables into one-hot encoded vectors
df = pd.get_dummies(df, columns=['location code', 'credit card info save', 'push status'])

# Convert discount rate per visited products and product detail view per app session to float
df['discount rate per visited products'] = df['discount rate per visited products'].str.replace(',', '.').astype(float)
df['product detail view per app session'] = df['product detail view per app session'].str.replace(',', '.').astype(float)


In [159]:
# Split the data into features and target
X = df.drop(columns=['churn'])
y = df['churn']

In [179]:
# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [180]:
# Replace commas with periods and convert to float
X_val['add to cart per session'] = X_val['add to cart per session'].str.replace(',', '.').astype(float)

# Replace commas with decimal points in 'avg order value' column
X_val['avg order value'] = X_val['avg order value'].str.replace(',', '.')

# Convert 'avg order value' column to float
X_val['avg order value'] = X_val['avg order value'].astype(float)

In [181]:
# Now, try scaling the features again
X_val_scaled = scaler.transform(X_val)

In [182]:
# Convert sequential data into sequences suitable for RNN input
# Define the sequence length (you can adjust this based on the context)
sequence_length = 10

In [183]:
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length + 1):
        sequences.append(data[i:i+sequence_length])
    return np.array(sequences)


In [184]:
# Create sequences for validation data
X_val_sequences = create_sequences(X_val_scaled, sequence_length)

# Trim the target labels to match the sequence length
y_val_trimmed = y_val[sequence_length - 1:]

In [185]:
# Define the RNN model
rnn_model = Sequential([
    LSTM(64, input_shape=(X_val_sequences.shape[1], X_val_sequences.shape[2])),
    Dense(1, activation='sigmoid')
])


In [186]:
# Compile the model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [187]:
# Train the model
rnn_model.fit(X_val_sequences, y_val_trimmed, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ab532235cf0>

In [189]:
# Predict the probabilities of churn for the validation data
y_val_pred_proba = rnn_model.predict(X_val_sequences)

# Convert probabilities to binary predictions (0 or 1) based on a threshold (e.g., 0.5)
y_val_pred = (y_val_pred_proba > 0.5).astype(int)

# Calculate evaluation metrics
accuracy = accuracy_score(y_val_trimmed, y_val_pred)
precision = precision_score(y_val_trimmed, y_val_pred)
recall = recall_score(y_val_trimmed, y_val_pred)
f1 = f1_score(y_val_trimmed, y_val_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.8753799392097265
Precision: 0.8
Recall: 0.24
F1-score: 0.36923076923076925


In [190]:
# On FNN we used the pickle import to save now here we are using the joblib import . some variety hey , some spice in your life : )
# import joblib
# from keras.models import load_model

# Save the RNN model to a file
# def save_model(model, filepath):
#    model.save(filepath)

In [194]:
# Function to load the saved RNN model and make predictions
def predict_churn(model_filepath, new_data):
    # Load the saved model
    rnn_model = load_model(model_filepath)

    # Preprocess the new data (similar to what was done for training data)
    # Replace commas with periods and convert to float
    new_data['add to cart per session'] = new_data['add to cart per session'].str.replace(',', '.').astype(float)
    new_data['avg order value'] = new_data['avg order value'].str.replace(',', '.').astype(float)

    # Scaling the features
    new_data_scaled = scaler.transform(new_data)

    # Convert sequential data into sequences suitable for RNN input
    X_new_sequences = create_sequences(new_data_scaled, sequence_length)

    # Predict churn rate for new data
    churn_probabilities = rnn_model.predict(X_new_sequences)

    return churn_probabilities

In [195]:
# Example usage:
# Save the model
# save_model(rnn_model, 'rnn_model.h5')
