In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [2]:
# Load training data
train_data = pd.read_csv("D:/Loan_Risk/Dataset/Training Data.csv")

# Define features and target
X = train_data.drop(columns=['Id', 'Risk_Flag'])
y = train_data['Risk_Flag']

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify numerical and categorical columns
numeric_features = ['Income', 'Age', 'Experience', 'CURRENT_JOB_YRS', 'CURRENT_HOUSE_YRS']
categorical_features = ['Married/Single', 'House_Ownership', 'Car_Ownership', 'Profession', 'CITY', 'STATE']
print("Data Loaded and Prepared Successfully!")


Data Loaded and Prepared Successfully!


In [3]:
# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])
print("Preprocessing Pipeline Defined Successfully!")


Preprocessing Pipeline Defined Successfully!


In [4]:
# Define models within pipelines
random_forest_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

logistic_regression_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])
print("Models Defined Successfully!")


Models Defined Successfully!


In [8]:
# Train and evaluate Random Forest model
random_forest_model.fit(X_train, y_train)
rf_val_preds = random_forest_model.predict(X_val)
rf_accuracy = accuracy_score(y_val, rf_val_preds)
rf_classification_report = classification_report(y_val, rf_val_preds)

print("Random Forest Model:")
print("Validation Accuracy:", rf_accuracy)
print("Classification Report:\n", rf_classification_report)


Random Forest Model:
Validation Accuracy: 0.8985714285714286
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.95      0.94     44147
           1       0.60      0.54      0.57      6253

    accuracy                           0.90     50400
   macro avg       0.77      0.74      0.75     50400
weighted avg       0.89      0.90      0.90     50400



In [6]:
import joblib

# Save the Random Forest model
joblib.dump(random_forest_model, 'random_forest_model.pkl')


['random_forest_model.pkl']

In [7]:
# Load the Random Forest model
random_forest_model = joblib.load('random_forest_model.pkl')

# Fit the model with a small subset of training data
small_X_train = X_train[:100]  # Use a small subset of training data
small_y_train = y_train[:100]
random_forest_model.fit(small_X_train, small_y_train)


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
# Run the Flask application
%run app.py

 * Serving Flask app 'app'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


In [None]:

# Expose the Flask application with ngrok
from flask_ngrok import run_with_ngrok

# Create an instance of Flask
run_with_ngrok(app)

# Run the Flask application
app.run()
