In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("/Users/lambardaar/Desktop/STUDY/python/CA/Bank_Churn.csv")
df.describe()

Unnamed: 0,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


Fixing BALANCE column


In [5]:
df["Balance"]=df["Balance"].replace(0,df["Balance"].mean())
df.head(10)


Unnamed: 0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,Hargrave,619,France,Female,42,2,76485.889288,1,1,1,101348.88,1
1,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,Boni,699,France,Female,39,1,76485.889288,2,0,0,93826.63,0
4,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
5,15574012,Chu,645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1
6,15592531,Bartlett,822,France,Male,50,7,76485.889288,2,1,1,10062.8,0
7,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,1,0,119346.88,1
8,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.5,0
9,15592389,H?,684,France,Male,27,2,134603.88,1,1,1,71725.73,0


ENCODING

In [6]:
le_geo=LabelEncoder()
le_gen=LabelEncoder()
df['Geography']=le_geo.fit_transform(df["Geography"])
df['Gender']=le_gen.fit_transform(df["Gender"])


Feature Engineering

In [7]:
df['experience'] = df['EstimatedSalary'] * df['Tenure']
df['age_balance_ratio'] = df['Age'] / (df['Balance'] + 1)

Feature selection

In [8]:
features = ["CreditScore", "Age", "Tenure", "NumOfProducts",
            "EstimatedSalary", "Gender", "Geography", "IsActiveMember",
            "experience", "age_balance_ratio"]
X = df[features]
y = df["Balance"]

FEATURE SCALING AND SPLITING DATA

In [9]:
sc = StandardScaler()
X_sc = sc.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_sc, y, test_size=0.3, random_state=42
)

TRAINING  AND PREDECTION MODEL

In [10]:
gbr = GradientBoostingRegressor(n_estimators=300, learning_rate=0.05,max_depth=5, random_state=42)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)


EVALUATION OF MODEL 

In [11]:
r2 = r2_score(y_test, pred)
mse = mean_squared_error(y_test, pred)
rmse = mse ** 0.5

print("R2 score:", r2)
print("MSE:", mse)
print("RMSE:", rmse)


R2 score: 0.9985861202095796
MSE: 1402709.1447418248
RMSE: 1184.3602259202328


In [12]:
def user_pred_args(credit_score, age, tenure, num_products,
                   salary, gender, geography, is_active):
    
    gen_enc = le_gen.transform([gender])[0]
    geo_enc = le_geo.transform([geography])[0]

    experience = salary * tenure
    avg_balance = df["Balance"].mean()
    age_balance_ratio = age / (avg_balance + 1)

    user_df = pd.DataFrame([[
        credit_score, age, tenure, num_products,
        salary, gen_enc, geo_enc, is_active,
        experience, age_balance_ratio
    ]], columns=features)

    user_scaled = sc.transform(user_df)

    predicted_balance = gbr.predict(user_scaled)[0]

    print(f"predection of balance: ₹{predicted_balance:,.2f}")


In [13]:
user_pred_args(800, 42, 1, 1, 101348.88, "Male", "France", 1)


predection of balance: ₹105,937.74


In [14]:
import joblib

joblib.dump(gbr, "gbr_balance_model.pkl")


['gbr_balance_model.pkl']

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

df = pd.read_csv("/Users/lambardaar/Desktop/STUDY/python/CA/Bank_Churn.csv")
df["Balance"] = df["Balance"].replace(0, df["Balance"].mean())
le_geo = LabelEncoder()
le_gen = LabelEncoder()
df['Geography'] = le_geo.fit_transform(df["Geography"])
df['Gender'] = le_gen.fit_transform(df["Gender"])
df['experience'] = df['EstimatedSalary'] * df['Tenure']
df['age_balance_ratio'] = df['Age'] / (df['Balance'] + 1)
features = ["CreditScore", "Age", "Tenure", "NumOfProducts", "EstimatedSalary", "Gender", "Geography", "IsActiveMember", "experience", "age_balance_ratio"]
X = df[features]
y = df["Balance"]
sc = StandardScaler()
X_sc = sc.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_sc, y, test_size=0.3, random_state=42)
gbr = GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=5, random_state=42)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)
r2 = r2_score(y_test, pred)
mse = mean_squared_error(y_test, pred)
rmse = mse ** 0.5
print("R2 score:", r2)
print("MSE:", mse)
print("RMSE:", rmse)
def user_pred_args(credit_score, age, tenure, num_products, salary, gender, geography, is_active):
    gen_enc = le_gen.transform([gender])[0]
    geo_enc = le_geo.transform([geography])[0]
    experience = salary * tenure
    avg_balance = df["Balance"].mean()
    age_balance_ratio = age / (avg_balance + 1)
    user_df = pd.DataFrame([[credit_score, age, tenure, num_products, salary, gen_enc, geo_enc, is_active, experience, age_balance_ratio]], columns=features)
    user_scaled = sc.transform(user_df)
    predicted_balance = gbr.predict(user_scaled)[0]
    print(f"predection of balance: ₹{predicted_balance:,.2f}")
user_pred_args(800, 42, 1, 1, 101348.88, "Male", "France", 1)
joblib.dump(gbr, "gbr_balance_model.pkl")