In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Corrected the import statement
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
loan_dataset = pd.read_csv('./demo_data.csv')

# Drop rows with missing values

loan_dataset = loan_dataset.dropna()

loan_dataset = loan_dataset.replace(to_replace="N", value="0")
loan_dataset = loan_dataset.replace(to_replace="Y", value="1")
loan_dataset = loan_dataset.replace(to_replace="Yes", value="1")
loan_dataset = loan_dataset.replace(to_replace="No", value="0")
loan_dataset = loan_dataset.replace(to_replace="Male", value="1")
loan_dataset = loan_dataset.replace(to_replace="Female", value="0")
loan_dataset = loan_dataset.replace(to_replace="Graduate", value="1")
loan_dataset = loan_dataset.replace(to_replace="Not Graduate", value="0")
loan_dataset = loan_dataset.replace(to_replace="Rural", value="0")
loan_dataset = loan_dataset.replace(to_replace="Urban", value="2")
loan_dataset = loan_dataset.replace(to_replace="Semiurban", value="1")
loan_dataset = loan_dataset.replace(to_replace="3+", value="4")


# Separate features (X) and target (Y)
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.1, stratify=Y, random_state=2)

# Create and train the SVM classifier with a linear kernel
classifier = SVC(kernel='linear')
classifier.fit(X_train, Y_train)

# Make predictions on the training and testing data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

# Calculate the percentage of loan approvals in the test data
percentage_approval = (X_test_prediction == 1).mean() * 100  # Changed 'Y' to 1

print('Percentage of loan approvals in the test data:', percentage_approval)

# Save the trained model to a file
filename = 'trained_model.sav'
pickle.dump(classifier, open(filename, 'wb'))


Percentage of loan approvals in the test data: 77.08333333333334


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
loan_dataset = pd.read_csv('./demo1_data.csv')

# Feature Engineering and Data Preprocessing



loan_dataset = loan_dataset.dropna()

loan_dataset = loan_dataset.replace(to_replace="N", value="0")
loan_dataset = loan_dataset.replace(to_replace="Y", value="1")
loan_dataset = loan_dataset.replace(to_replace="Yes", value="1")
loan_dataset = loan_dataset.replace(to_replace="No", value="0")
loan_dataset = loan_dataset.replace(to_replace="Male", value="1")
loan_dataset = loan_dataset.replace(to_replace="Female", value="0")
loan_dataset = loan_dataset.replace(to_replace="Graduate", value="1")
loan_dataset = loan_dataset.replace(to_replace="Not Graduate", value="0")
loan_dataset = loan_dataset.replace(to_replace="Rural", value="0")
loan_dataset = loan_dataset.replace(to_replace="Urban", value="2")
loan_dataset = loan_dataset.replace(to_replace="Semiurban", value="1")
loan_dataset = loan_dataset.replace(to_replace="3+", value="4")


# Implement feature engineering and data preprocessing steps as needed

# Separate features and target variable
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=2)

# Model Selection and Hyperparameter Tuning (Random Forest)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

rf_classifier = RandomForestClassifier(random_state=2)
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, Y_train)

best_rf_classifier = grid_search.best_estimator_

# Evaluate the model
X_train_prediction = best_rf_classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

X_test_prediction = best_rf_classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
percentage_approval = (X_test_prediction == 'Y').mean() * 100

print('Percentage of loan approvals in the test data:', percentage_approval)

# Save the trained model
filename = 'trained_model1_rf.sav'
pickle.dump(best_rf_classifier, open(filename, 'wb'))

loaded_model = pickle.load(open('trained_model1_rf.sav', 'rb'))




loaded_model = pickle.load(open('trained_model.sav', 'rb'))

input_data = (1, 0, 1, 0, 1, 4053, 2026, 158, 560, 0, 2)
input_data_numpy = np.asarray(input_data)
input_data_reshape = input_data_numpy.reshape(1, -1)
prediction = classifier.predict(input_data_reshape)

print('Accuracy on training data : ', training_data_accuracy)
print('Accuracy on test data : ', test_data_accuracy)


print(prediction)
if (prediction[0] == '0'):
    print('loan is not approved')
else:
    print('loan is approved')


Percentage of loan approvals in the test data: 0.0


NameError: name 'classifier' is not defined

In [20]:
import numpy as np
import pandas as pd
import pickle

# Load the trained model
loaded_model = pickle.load(open('trained_model1_rf.sav', 'rb'))

# Sample data in the same format as your training data
sample_data = {
    'Gender': 1,  # 1 for Male
    'Married': 0,  # 0 for No
    'Education': 1,  # 1 for Graduate
    'Self_Employed': 0,  # 0 for No
    'Property_Area': 1,  # 1 for Semiurban
    'ApplicantIncome': 40053,
    'CoapplicantIncome': 20026,
    'LoanAmount': 15008,
    'Loan_Amount_Term': 760,
    'Credit_History': 621,  # 0 for No, 1 for Yes, or 2 for Urban (whichever corresponds to your data)
    'Dependents': 2  # 4 for '3+' (use the value you assigned in preprocessing)
}

# Create a DataFrame from the sample data with matching feature names
sample_data_df = pd.DataFrame([sample_data], columns=X.columns)

# Use the trained model to make predictions
sample_prediction = loaded_model.predict(sample_data_df)

# Check the prediction
if sample_prediction[0] == '1':
    print('Loan is approved')
else:
    print('Loan is not approved')


Loan is not approved


In [34]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
loan_dataset = pd.read_csv('./demo1_data.csv')

# Feature Engineering and Data Preprocessing


loan_dataset = loan_dataset.dropna()

loan_dataset = loan_dataset.replace(to_replace="N", value="0")
loan_dataset = loan_dataset.replace(to_replace="Y", value="1")
loan_dataset = loan_dataset.replace(to_replace="Yes", value="1")
loan_dataset = loan_dataset.replace(to_replace="No", value="0")
loan_dataset = loan_dataset.replace(to_replace="Male", value="1")
loan_dataset = loan_dataset.replace(to_replace="Female", value="0")
loan_dataset = loan_dataset.replace(to_replace="Graduate", value="1")
loan_dataset = loan_dataset.replace(to_replace="Not Graduate", value="0")
loan_dataset = loan_dataset.replace(to_replace="Rural", value="0")
loan_dataset = loan_dataset.replace(to_replace="Urban", value="2")
loan_dataset = loan_dataset.replace(to_replace="Semiurban", value="1")
loan_dataset = loan_dataset.replace(to_replace="3+", value="4")


# ... (Your preprocessing code here)

# Separate features and target variable
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=2)

# Model Selection and Hyperparameter Tuning (Random Forest)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

rf_classifier = RandomForestClassifier(random_state=2)
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, Y_train)

best_rf_classifier = grid_search.best_estimator_

# Evaluate the model
X_train_prediction = best_rf_classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

X_test_prediction = best_rf_classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
percentage_approval = (X_test_prediction == 'Y').mean() * 100

# print('Percentage of loan approvals in the test data:', percentage_approval)

# Save the trained model
filename = 'trained_model1_rf.sav'
pickle.dump(best_rf_classifier, open(filename, 'wb'))
# Sample data for prediction
input_data = pd.DataFrame({
    'Gender': ['Male'],  # Replace with the appropriate values
    'Married': ['Yes'],  # Replace with the appropriate values
    'Dependents': ['1'],  # Replace with the appropriate values
    'Education': ['Graduate'],  # Replace with the appropriate values
    'Self_Employed': ['No'],  # Replace with the appropriate values
    'ApplicantIncome': [4053],  # Replace with the appropriate values
    'CoapplicantIncome': [2026],  # Replace with the appropriate values
    'LoanAmount': [1551238],  # Replace with the appropriate values
    'Loan_Amount_Term': [12],  # Replace with the appropriate values
    'Credit_History': [750],  # Replace with the appropriate values
    'Property_Area': ['Urban']  # Replace with the appropriate values
})

# Perform the same preprocessing on the input data
input_data = input_data.replace(to_replace="No", value="0")
input_data = input_data.replace(to_replace="Yes", value="1")
input_data = input_data.replace(to_replace="Male", value="1")
input_data = input_data.replace(to_replace="Graduate", value="1")
input_data = input_data.replace(to_replace="Urban", value="2")

loaded_model = pickle.load(open('trained_model1_rf.sav', 'rb'))

# Make predictions on the preprocessed input data
prediction = loaded_model.predict(input_data)

print(prediction)
if (prediction[0] == '0'):
    print('Loan is not approved')
else:
    print('Loan is approved')

# rate_of_interest = 6 / 12 / 100
# months = 120 * 12
# rate1 = ((1 + rate_of_interest) ** months) / ((1 + rate_of_interest) ** months - 1)
# emi = 1551238 * rate_of_interest * rate1
# print(emi)

print('Accuracy on training data : ', training_data_accuracy)
print('Accuracy on test data : ', test_data_accuracy)


['0']
Loan is not approved
Accuracy on training data :  0.9994444444444445
Accuracy on test data :  0.99


In [32]:
# Sample data for prediction
input_data = pd.DataFrame({
    'Gender': ['Male'],  # Replace with the appropriate values
    'Married': ['Yes'],  # Replace with the appropriate values
    'Dependents': ['1'],  # Replace with the appropriate values
    'Education': ['Graduate'],  # Replace with the appropriate values
    'Self_Employed': ['No'],  # Replace with the appropriate values
    'ApplicantIncome': [4053],  # Replace with the appropriate values
    'CoapplicantIncome': [2026],  # Replace with the appropriate values
    'LoanAmount': [1551238],  # Replace with the appropriate values
    'Loan_Amount_Term': [120],  # Replace with the appropriate values
    'Credit_History': [750],  # Replace with the appropriate values
    'Property_Area': ['Urban']  # Replace with the appropriate values
})

# Perform the same preprocessing on the input data
input_data = input_data.replace(to_replace="No", value="0")
input_data = input_data.replace(to_replace="Yes", value="1")
input_data = input_data.replace(to_replace="Male", value="1")
input_data = input_data.replace(to_replace="Graduate", value="1")
input_data = input_data.replace(to_replace="Urban", value="2")


loaded_model = pickle.load(open('trained_model1_rf.sav', 'rb'))

# Make predictions on the preprocessed input data
prediction = loaded_model.predict(input_data)

print(prediction)
if (prediction[0] == '0'):
    print('Loan is not approved')
else:
    print('Loan is approved')


['1']
Loan is approved
