In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import Adam
from sklearn.cluster import KMeans


# Load lead data
leads_df = pd.read_csv('lead.csv')

# Preprocess industry and lead status
label_encoder_industry = LabelEncoder()
leads_df['industry_encoded'] = label_encoder_industry.fit_transform(leads_df['industry'])

label_encoder_status = LabelEncoder()
leads_df['lead_status_encoded'] = label_encoder_status.fit_transform(leads_df['lead_status'])

# Features
X = leads_df[['industry_encoded', 'lead_status_encoded']].values



In [2]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
# Perform KMeans clustering
k = 5  # Assuming 5 clusters
kmeans = KMeans(n_clusters=k, random_state=42)
leads_df['lead_cluster'] = kmeans.fit_predict(X_scaled)

# Scale the cluster assignments to the range [0, 10]
scaler_cluster = MinMaxScaler(feature_range=(0, 10))
leads_df['lead_score'] = scaler_cluster.fit_transform(leads_df['lead_cluster'].values.reshape(-1, 1))


In [4]:
# Features and target variable for the RNN model
X_rnn = leads_df[['industry_encoded', 'lead_status_encoded']].values
y_rnn = leads_df['lead_score'].values.reshape(-1, 1)

# Normalize features for the RNN model
scaler_rnn = StandardScaler()
X_scaled_rnn = scaler_rnn.fit_transform(X_rnn)

In [5]:
# Reshape input data for the RNN model
X_train_rnn, X_test_rnn, y_train_rnn, y_test_rnn = train_test_split(X_scaled_rnn, y_rnn, test_size=0.2, random_state=42)
X_train_rnn = X_train_rnn.reshape(X_train_rnn.shape[0], 1, X_train_rnn.shape[1])
X_test_rnn = X_test_rnn.reshape(X_test_rnn.shape[0], 1, X_test_rnn.shape[1])

In [6]:
# Define RNN model
model_rnn = Sequential()
model_rnn.add(GRU(units=64, dropout=0.2, recurrent_dropout=0.2, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
model_rnn.add(Dense(units=1, activation='linear'))

In [7]:
# Compile model
model_rnn.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train model
model_rnn.fit(X_train_rnn, y_train_rnn, epochs=20, batch_size=32, validation_data=(X_test_rnn, y_test_rnn), verbose=2)


Epoch 1/20
3/3 - 9s - loss: 28.3180 - mae: 4.0902 - val_loss: 36.0473 - val_mae: 4.8827 - 9s/epoch - 3s/step
Epoch 2/20
3/3 - 0s - loss: 28.0549 - mae: 4.0750 - val_loss: 35.6989 - val_mae: 4.8607 - 172ms/epoch - 57ms/step
Epoch 3/20
3/3 - 0s - loss: 27.7806 - mae: 4.0595 - val_loss: 35.3477 - val_mae: 4.8384 - 140ms/epoch - 47ms/step
Epoch 4/20
3/3 - 0s - loss: 27.5025 - mae: 4.0429 - val_loss: 34.9961 - val_mae: 4.8159 - 125ms/epoch - 42ms/step
Epoch 5/20
3/3 - 0s - loss: 27.2062 - mae: 4.0262 - val_loss: 34.6663 - val_mae: 4.7941 - 164ms/epoch - 55ms/step
Epoch 6/20
3/3 - 0s - loss: 26.9875 - mae: 4.0094 - val_loss: 34.3403 - val_mae: 4.7722 - 130ms/epoch - 43ms/step
Epoch 7/20
3/3 - 0s - loss: 26.6803 - mae: 3.9935 - val_loss: 34.0129 - val_mae: 4.7500 - 125ms/epoch - 42ms/step
Epoch 8/20
3/3 - 0s - loss: 26.4854 - mae: 3.9833 - val_loss: 33.6727 - val_mae: 4.7268 - 170ms/epoch - 57ms/step
Epoch 9/20
3/3 - 0s - loss: 26.1920 - mae: 3.9684 - val_loss: 33.3340 - val_mae: 4.7034 - 120

<keras.src.callbacks.History at 0x1e3e7e2f9d0>

In [8]:
# Predict lead scores
y_pred_rnn = model_rnn.predict(X_test_rnn)

# Scale predicted lead scores to the range [0, 10]
scaler_lead_score = MinMaxScaler(feature_range=(0, 10))
y_pred_scaled = scaler_lead_score.fit_transform(y_pred_rnn)



In [9]:
# Evaluate model performance
mse, mae = model_rnn.evaluate(X_test_rnn, y_test_rnn, verbose=0)
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

# Example prediction for a new lead
new_lead_features = [[label_encoder_industry.transform(['Technology'])[0], 
                      label_encoder_status.transform(['open'])[0]]]
new_lead_features_scaled = scaler_rnn.transform(new_lead_features)
new_lead_features_scaled_reshaped = new_lead_features_scaled.reshape(1, 1, 2)
predicted_lead_score = model_rnn.predict(new_lead_features_scaled_reshaped)
predicted_lead_score_scaled = scaler_lead_score.transform(predicted_lead_score)[0][0]

print(f"Predicted lead score for the new lead: {predicted_lead_score_scaled}")

Mean Squared Error: 28.912290573120117
Mean Absolute Error: 4.380837440490723
Predicted lead score for the new lead: 8.024524688720703
