In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the loan prediction data
data = pd.read_csv('/content/loan_data.csv')

# Remove rows with missing values
data.dropna(inplace=True)

# Separate features (X) and target variable (y)
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

# Encode categorical variables (if needed)
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

print('\nClassification Report:')
print(classification_report(y_test, y_pred))

print('\nConfusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Add a column for predicted outputs to the DataFrame
data['Predicted_Output'] = clf.predict(X)

# Filter the DataFrame to include only rows from the testing set
tested_data = data.loc[X_test.index]

# Save the DataFrame with actual and predicted values for the tested data to a new CSV file
tested_data.to_csv('tested_loan_prediction_actual_vs_predicted_with_all_columns.csv', index=False)


Accuracy: 0.7580645161290323

Classification Report:
              precision    recall  f1-score   support

           N       0.67      0.50      0.57        20
           Y       0.79      0.88      0.83        42

    accuracy                           0.76        62
   macro avg       0.73      0.69      0.70        62
weighted avg       0.75      0.76      0.75        62


Confusion Matrix:
[[10 10]
 [ 5 37]]
