In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

file_path = 'Credit Banking_Project - 1 (1).csv'
data = pd.read_csv(file_path)

data['Limit'] = data['Limit'].str.replace('INR ', '').str.replace(',', '').astype(float)

data_encoded = pd.get_dummies(data, columns=['City', 'Credit Card Product', 'Company', 'Segment'])

data_encoded['Credit_Approved'] = (data_encoded['Limit'] > 100000).astype(int)

data_encoded.drop(columns=['Sl No:', 'Customer'], inplace=True)

X = data_encoded.drop(columns=['Credit_Approved'])
y = data_encoded['Credit_Approved']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

rf_y_pred = rf_model.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_precision = precision_score(y_test, rf_y_pred)
rf_recall = recall_score(y_test, rf_y_pred)
rf_f1 = f1_score(y_test, rf_y_pred)

print("Random Forest Model")
print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"Recall: {rf_recall}")
print(f"F1 Score: {rf_f1}")

lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)

lr_y_pred = lr_model.predict(X_test)

lr_accuracy = accuracy_score(y_test, lr_y_pred)
lr_precision = precision_score(y_test, lr_y_pred)
lr_recall = recall_score(y_test, lr_y_pred)
lr_f1 = f1_score(y_test, lr_y_pred)

print("\nLogistic Regression Model")
print(f"Accuracy: {lr_accuracy}")
print(f"Precision: {lr_precision}")
print(f"Recall: {lr_recall}")
print(f"F1 Score: {lr_f1}")


Random Forest Model
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Logistic Regression Model
Accuracy: 0.8
Precision: 0.8125
Recall: 0.9285714285714286
F1 Score: 0.8666666666666666


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

file_path = 'Credit Banking_Project - 1 (1).csv'
data = pd.read_csv(file_path)

data['Limit'] = data['Limit'].str.replace('INR ', '').str.replace(',', '').astype(float)

data_encoded = pd.get_dummies(data, columns=['City', 'Credit Card Product', 'Company', 'Segment'])

data_encoded['Credit_Approved'] = (data_encoded['Limit'] > 100000).astype(int)

data_encoded.drop(columns=['Sl No:', 'Customer'], inplace=True)

X = data_encoded.drop(columns=['Credit_Approved'])
y = data_encoded['Credit_Approved']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1

rf_model = RandomForestClassifier(random_state=42)
rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf_model, X_train, y_train, X_test, y_test)

print("Random Forest Model")
print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"Recall: {rf_recall}")
print(f"F1 Score: {rf_f1}")

lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_accuracy, lr_precision, lr_recall, lr_f1 = evaluate_model(lr_model, X_train, y_train, X_test, y_test)

print("\nLogistic Regression Model")
print(f"Accuracy: {lr_accuracy}")
print(f"Precision: {lr_precision}")
print(f"Recall: {lr_recall}")
print(f"F1 Score: {lr_f1}")

knn_model = KNeighborsClassifier()
knn_accuracy, knn_precision, knn_recall, knn_f1 = evaluate_model(knn_model, X_train, y_train, X_test, y_test)

print("\nK-Nearest Neighbors Model")
print(f"Accuracy: {knn_accuracy}")
print(f"Precision: {knn_precision}")
print(f"Recall: {knn_recall}")
print(f"F1 Score: {knn_f1}")


Random Forest Model
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Logistic Regression Model
Accuracy: 0.8
Precision: 0.8125
Recall: 0.9285714285714286
F1 Score: 0.8666666666666666

K-Nearest Neighbors Model
Accuracy: 0.9
Precision: 0.875
Recall: 1.0
F1 Score: 0.9333333333333333


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

data = pd.read_csv('Credit Banking_Project - 1 (1).csv')


data['Limit'] = data['Limit'].str.replace('INR ', '').str.replace(',', '').astype(float)

data_encoded = pd.get_dummies(data, columns=['City', 'Credit Card Product', 'Company', 'Segment'])

data_encoded['Credit_Approved'] = (data_encoded['Limit'] > 100000).astype(int)

data_encoded.drop(columns=['Sl No:', 'Customer'], inplace=True)

X = data_encoded.drop(columns=['Credit_Approved'])
y = data_encoded['Credit_Approved']
print(X)
print(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return model, accuracy, precision, recall, f1

rf_model = RandomForestClassifier(random_state=42)
rf_model, rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf_model, X_train, y_train, X_test, y_test)

print("Random Forest Model")
print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"Recall: {rf_recall}")
print(f"F1 Score: {rf_f1}")

lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model, lr_accuracy, lr_precision, lr_recall, lr_f1 = evaluate_model(lr_model, X_train, y_train, X_test, y_test)

print("\nLogistic Regression Model")
print(f"Accuracy: {lr_accuracy}")
print(f"Precision: {lr_precision}")
print(f"Recall: {lr_recall}")
print(f"F1 Score: {lr_f1}")

knn_model = KNeighborsClassifier()
knn_model, knn_accuracy, knn_precision, knn_recall, knn_f1 = evaluate_model(knn_model, X_train, y_train, X_test, y_test)

print("\nK-Nearest Neighbors Model")
print(f"Accuracy: {knn_accuracy}")
print(f"Precision: {knn_precision}")
print(f"Recall: {knn_recall}")
print(f"F1 Score: {knn_f1}")

def make_prediction(model, example):
    example_encoded = pd.get_dummies(example, columns=['City', 'Credit Card Product', 'Company', 'Segment'])
    example_encoded = example_encoded.reindex(columns=X.columns, fill_value=0)
    prediction = model.predict(example_encoded)
    return prediction

new_data = pd.DataFrame({
    'Age': [30, 55],
    'Limit': [150000.0, 50000.0],
    'City': ['BANGALORE', 'BOMBAY'],
    'Credit Card Product': ['Gold', 'Silver'],
    'Company': ['C1', 'C2'],
    'Segment': ['Self Employed', 'Salaried_MNC']
})

rf_predictions = make_prediction(rf_model, new_data)
print("\nRandom Forest Predictions")
print(rf_predictions)

lr_predictions = make_prediction(lr_model, new_data)
print("\nLogistic Regression Predictions")
print(lr_predictions)

knn_predictions = make_prediction(knn_model, new_data)
print("\nK-Nearest Neighbors Predictions")
print(knn_predictions)


Random Forest Model
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Logistic Regression Model
Accuracy: 0.8
Precision: 0.8125
Recall: 0.9285714285714286
F1 Score: 0.8666666666666666

K-Nearest Neighbors Model
Accuracy: 0.9
Precision: 0.875
Recall: 1.0
F1 Score: 0.9333333333333333

Random Forest Predictions
[1 0]

Logistic Regression Predictions
[1 0]

K-Nearest Neighbors Predictions
[1 0]
