# Importing Datasets and Loading

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [37]:
market_df=pd.read_csv("market_train.csv",sep=";")

In [38]:
market_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [39]:
market_df.columns

Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y'],
      dtype='object')

In [40]:
market_df["marital"].value_counts()

marital
married     27214
single      12790
divorced     5207
Name: count, dtype: int64

In [41]:
market_df["marital"]=market_df["marital"].map({"married":0,"single":1,"divorced":2})

In [42]:
market_df["education"].value_counts()

education
secondary    23202
tertiary     13301
primary       6851
unknown       1857
Name: count, dtype: int64

In [43]:
market_df["contact"].value_counts()

contact
cellular     29285
unknown      13020
telephone     2906
Name: count, dtype: int64

In [44]:
market_df["contact"]=market_df["contact"].map({"cellular":0,"unknown":1,"telephone":2})

In [45]:
market_df=market_df.drop("poutcome",axis=1)

In [46]:
market_df["default"]=market_df["default"].map({"no":0,"yes":1})
market_df["loan"]=market_df["loan"].map({"no":0,"yes":1})
market_df["y"]=market_df["y"].map({"no":0,"yes":1})

In [47]:
market_df["education"]=market_df["education"].map({"unknown":0,"primary":1,"secondary":2,"tertiary":3})

In [48]:
market_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,management,0,3,0,2143,yes,0,1,5,may,261,1,-1,0,0
1,44,technician,1,2,0,29,yes,0,1,5,may,151,1,-1,0,0
2,33,entrepreneur,0,2,0,2,yes,1,1,5,may,76,1,-1,0,0
3,47,blue-collar,0,0,0,1506,yes,0,1,5,may,92,1,-1,0,0
4,33,unknown,1,0,0,1,no,0,1,5,may,198,1,-1,0,0


In [49]:
market_df=market_df.drop(["balance"],axis=1)

In [50]:
market_df.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,management,0,3,0,yes,0,1,5,may,261,1,-1,0,0
1,44,technician,1,2,0,yes,0,1,5,may,151,1,-1,0,0
2,33,entrepreneur,0,2,0,yes,1,1,5,may,76,1,-1,0,0
3,47,blue-collar,0,0,0,yes,0,1,5,may,92,1,-1,0,0
4,33,unknown,1,0,0,no,0,1,5,may,198,1,-1,0,0


In [51]:
market_df["housing"]=market_df["housing"].map({"yes":1,"no":0})

In [52]:
market_df["job"].value_counts()

job
blue-collar      9732
management       9458
technician       7597
admin.           5171
services         4154
retired          2264
self-employed    1579
entrepreneur     1487
unemployed       1303
housemaid        1240
student           938
unknown           288
Name: count, dtype: int64

In [53]:
market_df["month"].value_counts()

month
may    13766
jul     6895
aug     6247
jun     5341
nov     3970
apr     2932
feb     2649
jan     1403
oct      738
sep      579
mar      477
dec      214
Name: count, dtype: int64

In [54]:
market_df["month"]=market_df["month"].map({"jan":0,"feb":1,"mar":2,"apr":3,"may":4,"jun":5,"july":6,"august":7,"sep":8,"oct":9,"nov":10,"dec":11})

In [55]:
market_df.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,management,0,3,0,1,0,1,5,4.0,261,1,-1,0,0
1,44,technician,1,2,0,1,0,1,5,4.0,151,1,-1,0,0
2,33,entrepreneur,0,2,0,1,1,1,5,4.0,76,1,-1,0,0
3,47,blue-collar,0,0,0,1,0,1,5,4.0,92,1,-1,0,0
4,33,unknown,1,0,0,0,0,1,5,4.0,198,1,-1,0,0


In [56]:
market_df[market_df["y"]==0]

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,management,0,3,0,1,0,1,5,4.0,261,1,-1,0,0
1,44,technician,1,2,0,1,0,1,5,4.0,151,1,-1,0,0
2,33,entrepreneur,0,2,0,1,1,1,5,4.0,76,1,-1,0,0
3,47,blue-collar,0,0,0,1,0,1,5,4.0,92,1,-1,0,0
4,33,unknown,1,0,0,0,0,1,5,4.0,198,1,-1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45194,59,management,0,3,0,1,1,0,16,10.0,162,2,187,5,0
45198,37,management,0,3,0,0,0,0,16,10.0,333,2,-1,0,0
45199,34,blue-collar,1,2,0,1,0,0,16,10.0,1166,3,530,12,0
45209,57,blue-collar,0,2,0,0,0,2,17,10.0,508,4,-1,0,0


In [57]:
market_df.dropna()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,management,0,3,0,1,0,1,5,4.0,261,1,-1,0,0
1,44,technician,1,2,0,1,0,1,5,4.0,151,1,-1,0,0
2,33,entrepreneur,0,2,0,1,1,1,5,4.0,76,1,-1,0,0
3,47,blue-collar,0,0,0,1,0,1,5,4.0,92,1,-1,0,0
4,33,unknown,1,0,0,0,0,1,5,4.0,198,1,-1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,technician,0,3,0,0,0,0,17,10.0,977,3,-1,0,1
45207,71,retired,2,1,0,0,0,0,17,10.0,456,2,-1,0,1
45208,72,retired,0,2,0,0,0,0,17,10.0,1127,5,184,3,1
45209,57,blue-collar,0,2,0,0,0,2,17,10.0,508,4,-1,0,0


In [58]:
market_df.isnull().sum()

age              0
job              0
marital          0
education        0
default          0
housing          0
loan             0
contact          0
day              0
month        13142
duration         0
campaign         0
pdays            0
previous         0
y                0
dtype: int64

In [59]:
market_df["job"]=market_df["job"].map({"unknown":0,"unemployed":1,"student":2,"housemaid":3,"blue-collar":4,"technician":5,"services":6,"retired":7,"self-employed":8,"entrepreneur":9,"admin":10,"management":11})

In [60]:
market_df.head(50)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,y
0,58,11.0,0,3,0,1,0,1,5,4.0,261,1,-1,0,0
1,44,5.0,1,2,0,1,0,1,5,4.0,151,1,-1,0,0
2,33,9.0,0,2,0,1,1,1,5,4.0,76,1,-1,0,0
3,47,4.0,0,0,0,1,0,1,5,4.0,92,1,-1,0,0
4,33,0.0,1,0,0,0,0,1,5,4.0,198,1,-1,0,0
5,35,11.0,0,3,0,1,0,1,5,4.0,139,1,-1,0,0
6,28,11.0,1,3,0,1,1,1,5,4.0,217,1,-1,0,0
7,42,9.0,2,3,1,1,0,1,5,4.0,380,1,-1,0,0
8,58,7.0,0,1,0,1,0,1,5,4.0,50,1,-1,0,0
9,43,5.0,1,2,0,1,0,1,5,4.0,55,1,-1,0,0


In [61]:
market_df.isnull().sum()

age              0
job           5171
marital          0
education        0
default          0
housing          0
loan             0
contact          0
day              0
month        13142
duration         0
campaign         0
pdays            0
previous         0
y                0
dtype: int64

market_df.dopna()

In [62]:
market_df=market_df.dropna(axis=1)

In [63]:
market_df.isnull().sum()

age          0
marital      0
education    0
default      0
housing      0
loan         0
contact      0
day          0
duration     0
campaign     0
pdays        0
previous     0
y            0
dtype: int64

In [64]:
feature=market_df.drop(['y'],axis=1)

In [65]:
target=market_df.iloc[:,-1:]

In [66]:
X_train,X_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=42)

In [67]:
X_train.head()

Unnamed: 0,age,marital,education,default,housing,loan,contact,day,duration,campaign,pdays,previous
10747,36,1,3,0,0,0,1,17,153,4,-1,0
26054,56,0,2,0,0,0,0,19,312,3,-1,0
9125,46,0,2,0,1,0,1,5,83,2,-1,0
41659,41,2,3,0,0,0,0,1,302,1,119,5
4443,38,0,2,0,1,0,1,20,90,1,-1,0


In [68]:
y_train.head()

Unnamed: 0,y
10747,0
26054,0
9125,0
41659,0
4443,0


# Logistic Regression

In [69]:
lr_model=LogisticRegression()

In [70]:
lr_model.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [71]:
lr_model.score(X_train,y_train)

0.8897525831832401

## Random_Forest_Classifier

In [72]:
rfc_model=RandomForestClassifier()

In [73]:
rfc_model.fit(X_train,y_train)

  return fit_method(estimator, *args, **kwargs)


In [74]:
rfc_model.score(X_train,y_train)*100

99.99684014282555

In [75]:
round(rfc_model.score(X_test,y_test)*100)

89

In [76]:
from joblib import dump

In [79]:
dump(rfc_model,"./../savemodels/marketcampaign.joblib")

['./../savemodels/marketcampaign.joblib']