In [7]:
# 📌 Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
import os
import joblib

# 📌 Load Dataset
df = pd.read_csv('../Data/raw_data.csv')
print("✅ Data Loaded:", df.shape)

# 📌 Handle Missing Values
df = df.dropna()

# 📌 Encode Categorical Variables
label_encoders = {}
categorical_cols = ['Gender', 'Smoker', 'Type_of_Claim']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 📌 Encode Target
target_mapping = {'Genuine Claim': 0, 'Fraudulent Claim': 1}
df['Claim_Status'] = df['Claim_Status'].map(target_mapping)

# 📌 Features and Target
X = df.drop('Claim_Status', axis=1)
y = df['Claim_Status']

# 📌 Apply StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

df_scaled = pd.DataFrame(X_scaled, columns=X.columns)
df_scaled['Claim_Status'] = y.values

# 📌 Save Preprocessed Data & Scaler
os.makedirs('../Data', exist_ok=True)
os.makedirs('../App/model', exist_ok=True)
df_scaled.to_csv('../Data/preprocessed_data.csv', index=False)
joblib.dump(scaler, '../App/model/scaler.pkl')

print("✅ Preprocessed data and scaler saved.")


✅ Data Loaded: (574, 11)
✅ Preprocessed data and scaler saved.
