Importing Libraries

In [11]:
import random
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

Generating Fake Random Data

In [14]:
# generating ages between 18 and 75
age = [random.randint(18, 75) for _ in range(1000)]

# generating gender (male and female)
gender = [random.randint(0, 1) for _ in range(1000)]

# generating 10 different occupations
occupation = [random.randint(0, 9) for _ in range(1000)]

# generating 50 states of USA
state = [random.randint(0, 49) for _ in range(1000)]

# generating coverage types (liability, collision, comprehensive)
coverage_types = [random.randint(0, 2) for _ in range(1000)]

# generating ammount of money the poilicy holder pays for insurance between 
# 50$ to 1000$
premium = [random.randint(50, 1000) for _ in range(1000)]

# generating 10 different types of claims filed
claim_history = [random.randint(0, 9) for _ in range(1000)]

# generating 15 different cars with different makes and model
vehicle_make_model = [random.randint(0, 14) for _ in range(1000)]

# generating history that a user has made a claim or not
claim_probability = [random.randint(0, 1) for _ in range(1000)]

data = []
for i in range(1000):
    instance = {
        "age": age[i],
        "gender": gender[i],
        "occupation": occupation[i],
        "state": state[i],
        "coverage_types": coverage_types[i],
        "premium": premium[i],
        "claim_history": claim_history[i],
        "vehicle_make_model": vehicle_make_model[i],
        "claim_probability": claim_probability[i]
    }
    data.append(instance)

data = pd.DataFrame(data)

In [15]:
data.head()

Unnamed: 0,age,gender,occupation,state,coverage_types,premium,claim_history,vehicle_make_model,claim_probability
0,38,1,7,29,2,726,8,5,0
1,28,1,9,12,0,200,7,12,0
2,49,0,7,7,2,604,7,10,1
3,56,1,2,6,0,515,4,5,1
4,42,1,6,4,2,849,5,12,1


Splitting The Data Into Training and Test Sets

In [18]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state=111)

Training Logistic Regression

In [19]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

Training Decision Tree

In [20]:
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

Comparing Metrics of Two Models

In [23]:
logreg_pred = logreg.predict(X_test)
tree_pred = tree.predict(X_test)

logreg_accuracy = accuracy_score(y_test, logreg_pred)
tree_accuracy = accuracy_score(y_test, tree_pred)

logreg_precision = precision_score(y_test, logreg_pred)
tree_precision = precision_score(y_test, tree_pred)

logreg_recall = recall_score(y_test, logreg_pred)
tree_recall = recall_score(y_test, tree_pred)

logreg_f1 = f1_score(y_test, logreg_pred)
tree_f1 = f1_score(y_test, tree_pred)

print("Logistic Regression Results:")
print("Accuracy:", logreg_accuracy)
print("Precision:", logreg_precision)
print("Recall:", logreg_recall)
print("F1 Score:", logreg_f1)

print("\n\nDecision Tree Results:")
print("Accuracy:", tree_accuracy)
print("Precision:", tree_precision)
print("Recall:", tree_recall)
print("F1 Score:", tree_f1)

Logistic Regression Results:
Accuracy: 0.52
Precision: 0.5661764705882353
Recall: 0.6754385964912281
F1 Score: 0.6160000000000001


Decision Tree Results:
Accuracy: 0.51
Precision: 0.5754716981132075
Recall: 0.5350877192982456
F1 Score: 0.5545454545454545
