In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

train_df = pd.read_csv("loan-train.csv")
test_df = pd.read_csv("loan-test.csv")


train_df = train_df.drop(columns=['Loan_ID'])

# Separate target
X = train_df.drop('Loan_Status', axis=1)
y = train_df['Loan_Status']

# Encode target variable
y = y.map({'Y': 1, 'N': 0})

# Handle categorical features
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

# Fill missing values
cat_imputer = SimpleImputer(strategy='most_frequent')
X[categorical_cols] = cat_imputer.fit_transform(X[categorical_cols])

num_imputer = SimpleImputer(strategy='mean')
X[numerical_cols] = num_imputer.fit_transform(X[numerical_cols])

# Encode categorical variables
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on validation set
y_pred = model.predict(X_val)

# Evaluate
accuracy = accuracy_score(y_val, y_pred)
report = classification_report(y_val, y_pred)

accuracy, report

(0.7723577235772358,
 '              precision    recall  f1-score   support\n\n           0       0.86      0.42      0.56        43\n           1       0.75      0.96      0.85        80\n\n    accuracy                           0.77       123\n   macro avg       0.81      0.69      0.70       123\nweighted avg       0.79      0.77      0.75       123\n')