### 1. Import Necessary Libraries

In [131]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

### 2. Import Data

In [106]:
claimants_data = pd.read_csv('claimants.csv')
claimants_data

Unnamed: 0,CASENUM,ATTORNEY,CLMSEX,CLMINSUR,SEATBELT,CLMAGE,LOSS
0,5,0,0.0,1.0,0.0,50.0,34.940
1,3,1,1.0,0.0,0.0,18.0,0.891
2,66,1,0.0,1.0,0.0,5.0,0.330
3,70,0,0.0,1.0,1.0,31.0,0.037
4,96,1,0.0,1.0,0.0,30.0,0.038
...,...,...,...,...,...,...,...
1335,34100,1,0.0,1.0,0.0,,0.576
1336,34110,0,1.0,1.0,0.0,46.0,3.705
1337,34113,1,1.0,1.0,0.0,39.0,0.099
1338,34145,0,1.0,0.0,0.0,8.0,3.177


### 3. Data Understanding

#### 3.1 Initial Analysis

In [107]:
claimants_data.shape

(1340, 7)

In [108]:
claimants_data.isna().sum()

CASENUM       0
ATTORNEY      0
CLMSEX       12
CLMINSUR     41
SEATBELT     48
CLMAGE      189
LOSS          0
dtype: int64

### 4 Data Preparation

In [109]:
del claimants_data['CASENUM']

In [110]:
claimants_data.dropna(axis=0,inplace=True)

In [111]:
claimants_data.isna().sum()

ATTORNEY    0
CLMSEX      0
CLMINSUR    0
SEATBELT    0
CLMAGE      0
LOSS        0
dtype: int64

In [112]:
claimants_data.dtypes

ATTORNEY      int64
CLMSEX      float64
CLMINSUR    float64
SEATBELT    float64
CLMAGE      float64
LOSS        float64
dtype: object

In [113]:
claimants_data

Unnamed: 0,ATTORNEY,CLMSEX,CLMINSUR,SEATBELT,CLMAGE,LOSS
0,0,0.0,1.0,0.0,50.0,34.940
1,1,1.0,0.0,0.0,18.0,0.891
2,1,0.0,1.0,0.0,5.0,0.330
3,0,0.0,1.0,1.0,31.0,0.037
4,1,0.0,1.0,0.0,30.0,0.038
...,...,...,...,...,...,...
1334,1,1.0,1.0,0.0,16.0,0.060
1336,0,1.0,1.0,0.0,46.0,3.705
1337,1,1.0,1.0,0.0,39.0,0.099
1338,0,1.0,0.0,0.0,8.0,3.177


### 5. Model Building

In [114]:
X = claimants_data.drop(labels=['ATTORNEY'],axis=1)
y = claimants_data[['ATTORNEY']]

In [115]:
X.shape,y.shape

((1096, 5), (1096, 1))

In [116]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=123,shuffle=True)

In [117]:
X_train.shape,y_train.shape #Training Data

((876, 5), (876, 1))

In [118]:
X_test.shape,y_test.shape

((220, 5), (220, 1))

### 6. Model Training

In [120]:
logistic_model = LogisticRegression() #Intialization
logistic_model.fit(X_train,y_train)

LogisticRegression()

In [121]:
logistic_model.coef_

array([[ 0.40131585,  0.54554594, -0.78114554,  0.01003038, -0.39685852]])

In [122]:
logistic_model.intercept_

array([-0.20480482])

### 7. Model Testing

In [123]:
y_pred_train = logistic_model.predict(X_train)
y_pred_test = logistic_model.predict(X_test)

### 8. Model Evaluation

#### Training Data Evaluation

In [124]:
print('Train Accuracy : ', accuracy_score(y_train, y_pred_train))
print('Confusion Matrics :\n',confusion_matrix(y_train,y_pred_train))
print('Classification Report : \n',classification_report(y_train,y_pred_train))

Train Accuracy :  0.7134703196347032
Confusion Matrics :
 [[315 151]
 [100 310]]
Classification Report : 
               precision    recall  f1-score   support

           0       0.76      0.68      0.72       466
           1       0.67      0.76      0.71       410

    accuracy                           0.71       876
   macro avg       0.72      0.72      0.71       876
weighted avg       0.72      0.71      0.71       876



In [125]:
print('Test Accuracy : ',accuracy_score(y_test,y_pred_test))
print('Confusion Matrics :\n',confusion_matrix(y_test,y_pred_test))
print('Classification Report : \n',classification_report(y_test,y_pred_test))

Test Accuracy :  0.6863636363636364
Confusion Matrics :
 [[66 46]
 [23 85]]
Classification Report : 
               precision    recall  f1-score   support

           0       0.74      0.59      0.66       112
           1       0.65      0.79      0.71       108

    accuracy                           0.69       220
   macro avg       0.70      0.69      0.68       220
weighted avg       0.70      0.69      0.68       220



### 9. Model Deployement 

In [126]:
from pickle import dump

In [127]:
dump(logistic_model,open('claimants_classification.pkl','wb'))

In [128]:
from pickle import load

In [129]:
log_model_pkl = load(open('claimants_classification.pkl','rb'))

In [130]:
log_model_pkl.predict(X_test)

array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1],
      dtype=int64)