<a href="https://colab.research.google.com/github/Guigo1008/ml_drafts/blob/main/predicting_insurance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries and the CSV

In [1]:
import pandas as pd
import graphviz
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, export_graphviz

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [5]:
data = pd.read_csv("insurance.csv")
data = data.drop("Unnamed: 0", axis=1)
data.head()

Unnamed: 0,GoodStudent,Age,SocioEcon,RiskAversion,VehicleYear,ThisCarDam,RuggedAuto,Accident,MakeModel,DrivQuality,...,HomeBase,AntiTheft,PropCost,OtherCarCost,OtherCar,MedCost,Cushioning,Airbag,ILiCost,DrivHist
0,False,Adult,Prole,Adventurous,Older,Moderate,EggShell,Mild,Economy,Poor,...,City,False,TenThou,Thousand,True,Thousand,Poor,False,Thousand,Many
1,False,Senior,Prole,Cautious,Current,,Football,,Economy,Normal,...,City,True,Thousand,Thousand,True,Thousand,Good,True,Thousand,Zero
2,False,Senior,UpperMiddle,Psychopath,Current,,Football,,FamilySedan,Excellent,...,City,False,Thousand,Thousand,False,Thousand,Good,True,Thousand,One
3,False,Adolescent,Middle,Normal,Older,,EggShell,,Economy,Normal,...,Suburb,False,Thousand,Thousand,True,Thousand,Fair,False,Thousand,Zero
4,False,Adolescent,Prole,Normal,Older,Moderate,Football,Moderate,Economy,Poor,...,City,False,TenThou,Thousand,False,Thousand,Fair,False,Thousand,Many


In [6]:
le = LabelEncoder()
y = data["Accident"]
x = data.drop("Accident", axis=1).apply(le.fit_transform)
x.head()

Unnamed: 0,GoodStudent,Age,SocioEcon,RiskAversion,VehicleYear,ThisCarDam,RuggedAuto,MakeModel,DrivQuality,Mileage,...,HomeBase,AntiTheft,PropCost,OtherCarCost,OtherCar,MedCost,Cushioning,Airbag,ILiCost,DrivHist
0,0,1,1,0,1,1,0,0,2,3,...,0,0,2,2,1,3,3,0,3,0
1,0,2,1,1,0,2,1,0,1,3,...,0,1,3,2,1,3,2,1,3,2
2,0,2,2,3,0,2,1,1,0,0,...,0,0,3,2,0,3,2,1,3,1
3,0,0,0,2,1,2,0,0,1,1,...,3,0,3,2,1,3,1,0,3,2
4,0,0,1,2,1,1,1,0,2,1,...,0,0,2,2,0,3,1,0,3,0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(x, y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y)

# Predicting

## Predicting Using Gaussian Naive Bayes

In [8]:
nb = GaussianNB()
nb.fit(X_train, y_train)
predictions_nb = nb.predict(X_test)

acc = accuracy_score(y_test, predictions_nb)
f1 = f1_score(y_test, predictions_nb, average="weighted")
rec = recall_score(y_test, predictions_nb, average="weighted")
prec = precision_score(y_test, predictions_nb, average="weighted")
print(f'Accuracy: {acc*100:.2f}%\nF1 Score: {f1*100:.2f}%\nRecall Score: {rec*100:.2f}%\nPrecision: {prec*100:.2f}%')

Accuracy: 85.47%
F1 Score: 83.28%
Recall Score: 85.47%
Precision: 92.07%


## Predicting Using Decision Trees

In [9]:
dt = DecisionTreeClassifier(random_state=42, max_depth=8, max_leaf_nodes=8)
dt.fit(X_train, y_train)
predictions_dt = dt.predict(X_test)

acc = accuracy_score(y_test, predictions_dt)
f1 = f1_score(y_test, predictions_dt, average="weighted")
rec = recall_score(y_test, predictions_dt, average="weighted")
prec = precision_score(y_test, predictions_dt, average="weighted")
print(f'Accuracy: {acc*100:.2f}%\nF1 Score: {f1*100:.2f}%\nRecall Score: {rec*100:.2f}%\nPrecision: {prec*100:.2f}%')

Accuracy: 94.08%
F1 Score: 93.87%
Recall Score: 94.08%
Precision: 94.03%


In [13]:
dot_data = export_graphviz(dt, out_file=None,
                           feature_names=x.columns,
                           class_names=y.values,
                           filled=True, rounded=True,
                           special_characters=True)

graph = graphviz.Source(dot_data)
graph.render("insurance_tree")
graph.view("insurance_tree")

'iris_tree.pdf'

## Predicting Using a MLP

In [None]:
y_test = pd.DataFrame(y_test).apply(le.fit_transform)
y_train = pd.DataFrame(y_train).apply(le.fit_transform)

In [None]:
mlp = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

mlp.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

mlp.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=.2)

In [11]:
predictions_mlp = mlp.predict(X_test)
acc = accuracy_score(y_test, predictions_mlp)
f1 = f1_score(y_test, predictions_mlp, average="weighted")
rec = recall_score(y_test, predictions_mlp, average="weighted")
prec = precision_score(y_test, predictions_mlp, average="weighted")
print(f'Accuracy: {acc:.2f}\nF1 Score: {f1:.2f}\nRecall Score: {rec:.2f}\nPrecision: {prec:.2f}')

NameError: ignored