In [7]:
# 1. Import libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [8]:
# 2. Load dataset
df = pd.read_csv("Dataset.csv")

# View first rows
print(df.head())


   id  Gender  Age  Driving_License  Region_Code  Previously_Insured  \
0   1    Male   44                1         28.0                   0   
1   2    Male   76                1          3.0                   0   
2   3    Male   47                1         28.0                   0   
3   4    Male   21                1         11.0                   1   
4   5  Female   29                1         41.0                   1   

  Vehicle_Age Vehicle_Damage  Annual_Premium  Policy_Sales_Channel  Vintage  \
0   > 2 Years            Yes         40454.0                  26.0      217   
1    1-2 Year             No         33536.0                  26.0      183   
2   > 2 Years            Yes         38294.0                  26.0       27   
3    < 1 Year             No         28619.0                 152.0      203   
4    < 1 Year             No         27496.0                 152.0       39   

   Response  
0         1  
1         0  
2         1  
3         0  
4         0  


In [9]:
# 3. Drop ID column (not useful for prediction)
df.drop(columns=['id'], inplace=True)


In [10]:
# 4. Encode categorical columns
categorical_cols = [
    'Gender',
    'Vehicle_Age',
    'Vehicle_Damage'
]

le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])


In [11]:
# 5. Split features and target
X = df.drop('Response', axis=1)
y = df['Response']


In [12]:
# 6. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [13]:
# 7. Feature Scaling (important for Logistic Regression)
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [14]:
# 8. Train Logistic Regression model
model = LogisticRegression(max_iter=1000)

model.fit(X_train, y_train)


In [15]:
# 9. Make predictions
y_pred = model.predict(X_test)


In [16]:
# 10. Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8774369604576107

Confusion Matrix:
 [[66880     0]
 [ 9342     0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.88      1.00      0.93     66880
           1       0.00      0.00      0.00      9342

    accuracy                           0.88     76222
   macro avg       0.44      0.50      0.47     76222
weighted avg       0.77      0.88      0.82     76222



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
# Example: new customer data
new_person = pd.DataFrame({
    'Gender': ['Male'],
    'Age': [35],
    'Driving_License': [1],
    'Region_Code': [28],
    'Previously_Insured': [0],
    'Vehicle_Age': ['1-2 Year'],
    'Vehicle_Damage': ['Yes'],
    'Annual_Premium': [35000],
    'Policy_Sales_Channel': [152],
    'Vintage': [120]
})

# Encode categorical columns
for col in categorical_cols:
    new_person[col] = le.fit_transform(new_person[col])

# Scale
new_person_scaled = scaler.transform(new_person)

# Prediction
prediction = model.predict(new_person_scaled)

print("Insurance Offer:", "YES" if prediction[0] == 1 else "NO")


Insurance Offer: NO
