In [1]:
# Loan_Eligibility_Model.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import pickle

# Load your dataset
data = pd.read_csv("Loan_Data1.csv")
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

# Feature Engineering
X['TotalIncome'] = X['ApplicantIncome'] + X['CoapplicantIncome']

# Identify numeric and categorical columns
numeric_columns = X.select_dtypes(include='number').columns
categorical_columns = X.select_dtypes(exclude='number').columns

# Create separate transformers for numeric and categorical columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_columns),
        ('cat', categorical_transformer, categorical_columns)
    ])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply the preprocessor to training and testing sets
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Build and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_processed, y_train)

# Save the model and preprocessor
with open('loan_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('preprocessor.pkl', 'wb') as preprocessor_file:
    pickle.dump(preprocessor, preprocessor_file)