#MAIL Data

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB,GaussianNB,BernoulliNB
from sklearn.metrics import confusion_matrix,classification_report,f1_score
import seaborn as sns

In [None]:
df=pd.read_csv("/content/mail_data.csv")
df.head(3)

In [None]:
df.info()

In [None]:
sns.boxplot(df)

In [None]:
df['Category'].value_counts()

In [None]:
x=df.drop('Category',axis=1)
y=df['Category']

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,train_size=0.8,random_state=42)

In [None]:
preprocessing=ColumnTransformer(
    transformers=[
        ('vectorizer',TfidfVectorizer(),'Message')
    ]
)

In [None]:
pipeline=Pipeline(
    steps=[
        ('preprocessing',preprocessing),
        ('model',MultinomialNB())
    ]
)

In [None]:
pipeline.fit(xtrain, ytrain)

In [None]:
pipeline.score(xtrain,ytrain)

In [None]:
pipeline.score(xtest,ytest)

In [None]:
y_pred=pipeline.predict(xtrain)

In [None]:
ypred=pipeline.predict(xtest)

In [None]:
confusion_matrix(ytrain,y_pred)

In [None]:
confusion_matrix(ytest,ypred)

In [None]:
print(f1_score(ytrain,y_pred,average='macro'))

In [None]:
print(f1_score(ytest,ypred,average='macro'))

In [None]:
print(classification_report(ytrain,y_pred))

In [None]:
print(classification_report(ytest,ypred))

#Bank_Personal_Loan_Modelling data

In [None]:
df=pd.read_csv("/content/Bank_Personal_Loan_Modelling.csv")
df.head(3)

In [None]:
df.info()

In [None]:
sns.boxplot(df)

In [None]:
df['CreditCard'].value_counts()

In [None]:
x=df.drop('CreditCard',axis=1)
y=df['CreditCard']

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,train_size=0.8,random_state=42)

In [None]:
preprocessing=ColumnTransformer(
    transformers=[
        ('scaler',StandardScaler(),x.columns),
    ]
)

In [None]:
pipeline=Pipeline(
    steps=[
        ('preprocessing',preprocessing),
        ('model',XGBClassifier())
    ]
)

In [None]:
pipeline.fit(xtrain,ytrain)

In [None]:
pipeline.score(xtrain,ytrain)

In [None]:
pipeline.score(xtest,ytest)

In [None]:
y_pred=pipeline.predict(xtrain)

In [None]:
ypred=pipeline.predict(xtest)

In [None]:
confusion_matrix(ytrain,y_pred)

In [None]:
confusion_matrix(ytest,ypred)

In [None]:
model=BernoulliNB ()
model.fit(xtrain,ytrain)

In [None]:
model.score(xtrain,ytrain)

In [None]:
model.score(xtest,ytest)

In [None]:
model=GaussianNB()
model.fit(xtrain,ytrain)

In [None]:
model.score(xtrain,ytrain)

0.73925

In [None]:
model.score(xtest,ytest)

0.752

In [None]:
y_pred=model.predict(xtrain)

In [None]:
ypred=model.predict(xtest)

In [None]:
confusion_matrix(ytrain,y_pred)

array([[2769,   53],
       [ 990,  188]])

In [None]:
confusion_matrix(ytest,ypred)

array([[699,   9],
       [239,  53]])

In [None]:
print(f1_score(ytrain,y_pred,average='macro'))

0.5532443912735309


In [None]:
print(f1_score(ytest,ypred,average='macro'))

0.574383370746408


In [None]:
print(classification_report(ytrain,y_pred))

              precision    recall  f1-score   support

           0       0.74      0.98      0.84      2822
           1       0.78      0.16      0.26      1178

    accuracy                           0.74      4000
   macro avg       0.76      0.57      0.55      4000
weighted avg       0.75      0.74      0.67      4000



In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.75      0.99      0.85       708
           1       0.85      0.18      0.30       292

    accuracy                           0.75      1000
   macro avg       0.80      0.58      0.57      1000
weighted avg       0.78      0.75      0.69      1000

