In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
import pickle
from sklearn.metrics import f1_score as f1
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import plotly.express as ex
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import plotly.offline as pyo

In [2]:
def prep(data):
    data=data.dropna(subset=["region"])
    data.sort_values(by="first_day_week", inplace=True)
    data['week']=data['first_day_week'].rank(method='dense').astype(int)
    mapp = {'Not Churn':0,'Churn':1}
    data["is_churn"]=data["is_churn"].map(mapp)
    

    
    return data
    

    
    

In [3]:
weights={
    'Consistency':1.0,
    'Productivity':1.0,
    'Improvement':1.0
    }


In [4]:
def getdata(data,value,ex=""):
    df=pd.pivot_table(data,index="distributor_wallet",columns="week", values=value ,aggfunc='first')
    pro=df.diff(axis=1).fillna(0)
    norm=df.copy()
    
    consis=norm.std(axis=1)
    produ = df.sum(axis=1)

    ovrl=(weights["Consistency"]* consis+
           weights["Productivity"]*produ + 
           weights['Improvement']*pro.sum(axis=1))

    df[ex+value+" consistency"]=consis
    df[ex+"total productivity of "+value]=produ
    df[ex+"improvent tally of "+value] = pro.sum(axis=1)
    df[ex+value+" score"] = ovrl

    return df.iloc[:,-4:]
    


In [5]:
def make_data(df):
    
    cashin=df[df["txn_type"]=="cash_in"]
    cashout=df[df["txn_type"]=="cash_out"]
    new=df[df["txn_type"]=="b2b_transfer"]
    
    data_ac_agents= getdata(new,"active_agent","b2b")
    cashin_ac_agents= getdata(cashin,"active_agent","cashin")
    cashout_ac_agents= getdata(cashout,"active_agent","cashout")
    
    
    data_active_days= getdata(new,"active_days","b2b")
    cashin_active_days= getdata(cashin,"active_days","cashin")
    cashout_active_days= getdata(cashout,"active_days","cashout")
    
    
    
    
    b2b_txn_ammu= getdata(new,"txn_amount","b2b")
    b2b_txn_count= getdata(new,"no_of_txn","b2b")
    n_txn_ammu= getdata(cashin,"txn_amount","cashin")
    n_txn_count= getdata(cashin,"no_of_txn","cashin")
    o_txn_ammu= getdata(cashout,"txn_amount","cashout")
    o_txn_count= getdata(cashout,"no_of_txn","cashout")
    txn_ammu= getdata(new,"active_dso")
    df_neo=pd.pivot_table(df,index="distributor_wallet",values=["lifetime_days","region","is_churn"] ,aggfunc='first')
    final = b2b_txn_ammu.join([b2b_txn_count, n_txn_ammu, o_txn_ammu, n_txn_count,o_txn_count,data_active_days,cashin_active_days,cashout_active_days,data_ac_agents,cashout_ac_agents,cashin_ac_agents,txn_ammu,df_neo], how='inner')
    final.dropna(inplace=True)
    return final



In [6]:
test=prep(pd.read_csv('distributor_churn_date_last_2_weeks_202402121120.csv'))

final_test=make_data(test)


X_test=final_test.drop("is_churn",axis=1)
y_test= final_test["is_churn"]

X_test_en = pd.get_dummies(X_test, columns=['region'],drop_first=True)

with open('scaler.pkl1', 'rb') as f:
    loaded_scaler = pickle.load(f)
X_test_scaled = loaded_scaler.transform(X_test_en)


In [7]:
from joblib import dump, load

# Load the models
#rf_model = load('rf_model.joblib')
ada_model = load('ada_model.joblib')
#svm_model = load('svm_model.joblib')
#nn_model = load('nn_model.joblib')
#xb_model = load('xb_model.joblib')



In [8]:
#rf_predictions = rf_model.predict(X_test_scaled)
ada_predictions = ada_model.predict(X_test_scaled)
#svm_predictions = svm_model.predict(X_test_scaled)
#nn_predictions = nn_model.predict(X_test_scaled)
#xb_predictions = xb_model.predict(X_test_scaled)

In [10]:
# # Create a list to store the most influential feature for each prediction
# most_influential_features = []

# # Iterate over each row in the test data
# for i in range(X_test_en.shape[0]):
#     # Store the original prediction
#     original_prediction = ada_model.predict(X_test_en[i:i+1])[0]

#     # Initialize variables to store the most influential feature and the maximum change in prediction
#     most_influential_feature = None
#     max_change_in_prediction = -float('inf')

#     # Perturb each feature individually and observe the change in prediction
#     for feature in X_test_en.columns:
#         # Create a copy of the row to perturb
#         perturbed_row = X_test_en.iloc[i:i+1].copy()
        
#         # Perturb the feature
#         perturbed_row[feature] = np.random.permutation(perturbed_row[feature])

#         # Compute the new prediction
#         new_prediction = ada_model.predict(perturbed_row)[0]

#         # Compute the change in prediction
#         change_in_prediction = abs(original_prediction - new_prediction)

#         # Update the most influential feature if necessary
#         if change_in_prediction > max_change_in_prediction:
#             most_influential_feature = feature
#             max_change_in_prediction = change_in_prediction

#     # Append the most influential feature to the list
#     most_influential_features.append(most_influential_feature)

# # Print the list of most influential features for each prediction
# most_influential_features


In [11]:
X_test['output']=ada_predictions
#X_test['Reason']=most_influential_features
proba = ada_model.predict_proba(X_test_scaled)
churn_prob = proba[:,1]
X_test['Probability']=churn_prob
X_test

Unnamed: 0_level_0,b2btxn_amount consistency,b2btotal productivity of txn_amount,b2bimprovent tally of txn_amount,b2btxn_amount score,b2bno_of_txn consistency,b2btotal productivity of no_of_txn,b2bimprovent tally of no_of_txn,b2bno_of_txn score,cashintxn_amount consistency,cashintotal productivity of txn_amount,...,cashinimprovent tally of active_agent,cashinactive_agent score,active_dso consistency,total productivity of active_dso,improvent tally of active_dso,active_dso score,lifetime_days,region,output,Probability
distributor_wallet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1300382838,1.152089e+04,37727161.0,-16293.0,3.772239e+07,96.873629,4285.0,137.0,4518.873629,3.808576e+05,37488674.0,...,-27.0,547.091883,0.000000,20.0,0.0,20.000000,32,Gazipur,0,0.278448
1300660880,7.492433e+04,6383497.0,105959.0,6.564380e+06,14.849242,129.0,21.0,164.849242,5.510837e+04,1764477.0,...,8.0,73.656854,0.000000,4.0,0.0,4.000000,1081,Khulna,1,0.527268
1302920295,6.608542e+05,5357345.0,-934589.0,5.083610e+06,26.870058,1326.0,38.0,1390.870058,7.866676e+05,3110792.0,...,-2.0,105.414214,0.000000,10.0,0.0,10.000000,68,Gazipur,0,0.283898
1310264434,4.537386e+06,14914703.0,-6416833.0,1.303526e+07,132.228968,1663.0,-187.0,1608.228968,2.319391e+06,7127512.0,...,-15.0,164.606602,0.000000,12.0,0.0,12.000000,466,Bogra,0,0.365094
1311992979,8.905374e+05,12038482.0,-1259410.0,1.166961e+07,139.300036,2941.0,-197.0,2883.300036,1.305538e+05,3965377.0,...,-3.0,232.121320,0.000000,10.0,0.0,10.000000,252,Rangpur,0,0.479143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1973969344,1.626841e+06,2442900.0,2300700.0,6.370441e+06,239.002092,388.0,338.0,965.002092,1.115475e+05,307432.0,...,3.0,92.121320,1.414214,6.0,2.0,9.414214,14,Bogra,0,0.313324
1977738010,3.745021e+05,15828946.0,529626.0,1.673307e+07,224.152850,1925.0,317.0,2466.152850,1.823467e+06,8222856.0,...,66.0,470.669048,0.000000,16.0,0.0,16.000000,378,Dhaka South,1,0.518553
1977836262,4.192730e+06,36077506.0,-5929416.0,3.434082e+07,212.132034,2830.0,-300.0,2742.132034,3.409432e+06,21554905.0,...,14.0,613.899495,0.000000,14.0,0.0,14.000000,1081,Khulna,0,0.384689
1984749996,6.701735e+06,37876758.0,-9477684.0,3.510081e+07,193.040151,2501.0,-273.0,2421.040151,3.490828e+06,25383418.0,...,-32.0,336.627417,0.000000,12.0,0.0,12.000000,1074,Rangpur,0,0.378162
