# Loan Approval Prediction
## Importing Necessary Libraries

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

## Load the datasets

In [40]:
train_df = pd.read_csv('./Training Dataset.csv')
test_df = pd.read_csv('./Test Dataset.csv')
sample_submission_df = pd.read_csv('./Sample_Submission.csv')

## Data preprocessing

In [41]:
def preprocess_data(df):
    # Fill missing values for categorical columns with the mode
    categorical_cols = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Credit_History']
    for col in categorical_cols:
        df[col].fillna(df[col].mode()[0], inplace=True)

    # Fill missing values for numerical columns with the mean
    numerical_cols = ['LoanAmount', 'Loan_Amount_Term', 'ApplicantIncome', 'CoapplicantIncome']
    for col in numerical_cols:
        df[col].fillna(df[col].mean(), inplace=True)

    # Encode categorical variables
    label_encoder = LabelEncoder()
    categorical_cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
    for col in categorical_cols:
        df[col] = label_encoder.fit_transform(df[col])

    return df

In [42]:
train_df = preprocess_data(train_df)
test_df = preprocess_data(test_df)

## Prepare training and validation datasets

In [43]:
X = train_df.drop(columns=['Loan_ID', 'Loan_Status'])
y = train_df['Loan_Status'].apply(lambda x: 1 if x == 'Y' else 0)

In [44]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

## Standardize the data

In [45]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(test_df.drop(columns=['Loan_ID']))

## Train a RandomForestClassifier model

In [46]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


## Make predictions

In [47]:
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.2f}")

Validation Accuracy: 0.77


## Predict on the test data

In [48]:
test_predictions = model.predict(X_test)
test_predictions = ['Y' if pred == 1 else 'N' for pred in test_predictions]

# Prepare the submission file

In [49]:
submission_df = pd.DataFrame({'Loan_ID': test_df['Loan_ID'], 'Loan_Status': test_predictions})
submission_df.to_csv('submission.csv', index=False)

In [50]:
print("Submission file has been created.")

Submission file has been created.


In [51]:
submission_data=pd.read_csv('submission.csv')

In [52]:
submission_data

Unnamed: 0,Loan_ID,Loan_Status
0,LP001015,Y
1,LP001022,Y
2,LP001031,Y
3,LP001035,Y
4,LP001051,Y
...,...,...
362,LP002971,Y
363,LP002975,Y
364,LP002980,Y
365,LP002986,Y
