<a href="https://colab.research.google.com/github/MayanjaJanet/MayanjaJanet.github.io/blob/master/DQL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import random
from collections import deque
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from imblearn.over_sampling import SMOTE
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from google.colab import files
import io
from keras.layers import Input, Dense




In [4]:
uploaded= files.upload()

Saving cleaned_data.csv to cleaned_data.csv


In [5]:
df = pd.read_csv(io.BytesIO(uploaded['cleaned_data.csv']))

In [5]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,1,0,0.0,0,0,5849,0.0,142.51153,360.0,1.0,2,1
1,1,1,1.0,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,1,1,0.0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,1,1,0.0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,1,0,0.0,0,0,6000,0.0,141.0,360.0,1.0,2,1


In [6]:
# Encode categorical variables
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])


In [7]:
# Features and target
X = df.drop('Loan_Status', axis=1)  # Assuming 'target' is the label column
y = df['Loan_Status']


In [8]:
# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)


# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)
# Reset the index to avoid KeyError
#y_train = y_train.reset_index(drop=True)



In [9]:
# Deep Q-Learning Agent
class DQLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Input(shape=(11,)))  # Define input shape here
        model.add(Dense(64, activation='relu'))

        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model


    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [10]:
# Initialize agent
state_size = X_train.shape[1]
action_size = 2  # Approve or Reject
agent = DQLAgent(state_size, action_size)


In [11]:
# Training parameters
episodes = 1
batch_size = 1


In [12]:
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Start training time measurement
start_time = time.time()

# Training loop (as before)
for e in range(episodes):
    # Initialize state variables for the episode
    state_batch = []
    action_batch = []
    reward_batch = []
    next_state_batch = []
    done_batch = []

    for i in range(len(X_train)):
        state = np.reshape(X_train[i], [1, state_size])
        action = agent.act(state)
        reward = 1 if action == y_train.iloc[i] else -1
        done = i == (len(X_train) - 1)
        next_state = np.reshape(X_train[min(i + 1, len(X_train) - 1)], [1, state_size])

        # Append to batches
        state_batch.append(state)
        action_batch.append(action)
        reward_batch.append(reward)
        next_state_batch.append(next_state)
        done_batch.append(done)

        # Store in memory (with a delay to avoid high memory consumption per episode)
        agent.remember(state, action, reward, next_state, done)

    # Sample a minibatch from memory and train the model in batches
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)

    # Print episode information
    print(f"Episode {e+1}/{episodes} - Epsilon: {agent.epsilon:.2f}")

# Calculate training time
training_time = time.time() - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Evaluate the model on the test set
y_pred = []
for i in range(len(X_test)):
    state = np.reshape(X_test[i], [1, state_size])
    action = agent.act(state)
    y_pred.append(action)

# Convert y_pred and y_test to numpy arrays for evaluation
y_pred = np.array(y_pred)

# Evaluate the agent on the test set (this can be done manually or automatically)
total_reward = 0
for i in range(len(X_test)):
    state = np.reshape(X_test[i], [1, state_size])
    action = agent.act(state)
    reward = 1 if action == y_train.values[i] else -1



    total_reward += reward

average_reward = total_reward / len(X_test)
print(f"Average Test Reward: {average_reward}")


# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.4f}")

# Recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.4f}")

# F1 Score
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.4f}")

# Confusion Matrix to calculate specificity
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Specificity (True Negative Rate)
specificity = tn / (tn + fp)
print(f"Specificity: {specificity:.4f}")


Episode 1/1 - Epsilon: 0.99
Training Time: 1.30 seconds
Average Test Reward: -0.03859649122807018
Accuracy: 0.5333
Precision: 0.5328
Recall: 0.5141
F1 Score: 0.5233
Specificity: 0.5524


In [13]:
# Save the model using the new .keras format
agent.model.save('credit_risk_dql_model.keras')


In [18]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
# Initialize the scaler
scaler = StandardScaler()

# Scale the training data
X_train_scaled = scaler.fit_transform(X_train)

# Example new applicant data (replace this with actual data)
new_applicant_data = pd.DataFrame({
    "Gender": [1],
    "Married": [1],
    "Dependents": [1.0],
    "Education": [0],
    "Self_Employed": [0],
    "ApplicantIncome": [4583],
    "CoapplicantIncome": [1508.0],
    "LoanAmount": [128.0],
    "Loan_Amount_Term": [360.0],
    "Credit_History": [1.0],
    "Property_Area": [0]
})

# Assuming 'scaler' and 'state_size' are defined in the context of training the agent
# Scale the features (same as during training)

new_applicant_scaled = scaler.transform(new_applicant_data.values)


# Reshape the data to match the input shape (1, state_size)
new_applicant_state = np.reshape(new_applicant_scaled, [1, state_size])

# Get Q-values for both actions from the trained Deep Q-Learning agent
q_values = agent.model.predict(new_applicant_state)  # agent.model is the trained model
print(f"Raw Q-values: {q_values}")


# Get the probabilities (for class 0 and class 1)
q_values = np.clip(q_values, -100, 100)  # Clip Q-values to avoid overflow
prob_default = 1 / (1 + np.exp(-q_values[0][0]))  # Apply sigmoid
prob_no_default = 1 / (1 + np.exp(-q_values[0][1]))  # Apply sigmoid

# Compare Q-values (predicted for actions 0 and 1) and choose the larger one
if prob_default > prob_no_default:
    action = 0  # Reject (class 0, Default)
    result = "The credit risk for the new applicant is high (Default)."
else:
    action = 1  # Approve (class 1, No Default)
    result = "The credit risk for the new applicant is low (No default)."

# Output the result and predicted probabilities
print(result)
print(f"Probability of Default: {prob_default:.4f}")
print(f"Probability of No Default: {prob_no_default:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Raw Q-values: [[-388.45557  425.32025]]
The credit risk for the new applicant is low (No default).
Probability of Default: 0.0000
Probability of No Default: 1.0000


  prob_default = 1 / (1 + np.exp(-q_values[0][0]))  # Apply sigmoid
