In [2]:
# Fraud Detection in Banking: In banking, fraud detection is a major use case for AI. Here, we can use a simple algorithm 
# to analyze transaction data and detect potential fraudulent activities based on historical patterns.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Example transaction data with features like amount, location, etc.
data = pd.read_csv('transaction_data.csv')  # Load dataset

# Preprocessing: selecting relevant features
features = ['amount', 'transaction_type', 'location', 'time_of_day']
X = data[features]
y = data['fraudulent']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

FileNotFoundError: [Errno 2] No such file or directory: 'transaction_data.csv'

In [3]:
# Loan Default Prediction: In the banking sector, predicting whether a loan applicant will default on a loan can be done 
# using machine learning algorithms, which analyze historical data to make predictions.

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Example loan data (loan_amount, income, credit_score, etc.)
data = pd.read_csv('loan_data.csv')

# Features and target variable (default: 0 = no, 1 = yes)
X = data[['loan_amount', 'income', 'credit_score', 'loan_term']]
y = data['default']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Predict loan defaults on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:\n", classification_report(y_test, y_pred))

FileNotFoundError: [Errno 2] No such file or directory: 'loan_data.csv'

In [None]:
# Customer Segmentation: In banking, deep learning can segment customers based on their spending patterns, 
# helping banks target customers with personalized offers.

import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler

# Load customer data
data = pd.read_csv('customer_data.csv')

# Features: spending habits, age, income, etc.
X = data[['age', 'income', 'monthly_spend']]
y = data['customer_segment']  # Target: customer segment (e.g., High Value, Low Value)

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define a neural network model
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=3))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))  # Binary classification: High Value or Low Value

# Compile and train the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_scaled, y, epochs=10, batch_size=32)

# Make predictions
predictions = model.predict(X_scaled)

# Evaluate model accuracy
accuracy = model.evaluate(X_scaled, y)
print(f"Model Accuracy: {accuracy[1] * 100:.2f}%")

In [None]:
# Loan Approval Prediction Using Random Forest (Supervised):

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Loan approval data
data = pd.read_csv('loan_approval.csv')

# Features and target variable
X = data[['age', 'income', 'loan_amount', 'credit_score']]
y = data['loan_approved']  # 0 = No, 1 = Yes

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a random forest model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Anomaly Detection Using K-Means Clustering (Unsupervised):

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Transaction data (e.g., amount, frequency, location)
data = pd.read_csv('transaction_data.csv')

# Features: transaction amount, transaction frequency, etc.
X = data[['amount', 'frequency', 'location']]

# Apply K-Means clustering to detect anomalies
kmeans = KMeans(n_clusters=2)  # 2 clusters: Normal vs. Fraudulent
data['cluster'] = kmeans.fit_predict(X)

# Plot clusters
plt.scatter(data['amount'], data['frequency'], c=data['cluster'])
plt.xlabel('Amount')
plt.ylabel('Frequency')
plt.title('Transaction Clusters: Normal vs. Fraudulent')
plt.show()

In [None]:
# Detecting Unusual Login Behavior (Anomaly Detection):

from sklearn.ensemble import IsolationForest
import pandas as pd

# Customer login data (time of login, IP address, device used)
data = pd.read_csv('login_data.csv')

# Features: time of login, IP address (encoded), device type
X = data[['login_time', 'ip_address', 'device_type']]

# Use Isolation Forest for anomaly detection (outlier detection)
model = IsolationForest(n_estimators=100, contamination=0.05)
data['anomaly'] = model.fit_predict(X)

# Print results (1 = normal, -1 = anomaly)
print(data[data['anomaly'] == -1])

In [None]:
# Bagging (Random Forest): Scenario: Predicting loan approval based on historical customer data (e.g., credit score, income, age, etc.).

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Generating a synthetic banking dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Random Forest (Bagging)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy:.4f}")

In [None]:
# Boosting (Gradient Boosting): Scenario: Predicting whether a customer will default on a loan based on various features 
# such as payment history, loan type, etc.

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Generating a synthetic banking dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

# Predictions
y_pred = gb_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Gradient Boosting Accuracy: {accuracy:.4f}")

In [None]:
# Stacking (Stacked Generalization): Scenario: Predicting customer churn in a bank based on customer interaction, account activity, 
# and demographic data.

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Generating a synthetic banking dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Defining base models
base_models = [
    ('lr', LogisticRegression(random_state=42)),
    ('svm', SVC(random_state=42)),
    ('dt', DecisionTreeClassifier(random_state=42))
]

# StackingClassifier
stacking_model = StackingClassifier(estimators=base_models, final_estimator=LogisticRegression())
stacking_model.fit(X_train, y_train)

# Predictions
y_pred = stacking_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Stacking Accuracy: {accuracy:.4f}")

In [None]:
# Voting (VotingClassifier): Scenario: Classifying customers into segments based on financial behavior (e.g., high-value, medium-value, low-value).

from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Generating a synthetic banking dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the models
model1 = LogisticRegression(random_state=42)
model2 = SVC(random_state=42)
model3 = DecisionTreeClassifier(random_state=42)

# Voting Classifier (Hard Voting)
voting_model = VotingClassifier(estimators=[('lr', model1), ('svm', model2), ('dt', model3)], voting='hard')
voting_model.fit(X_train, y_train)

# Predictions
y_pred = voting_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Voting Classifier Accuracy: {accuracy:.4f}")

In [None]:
# Gaussian Mixture Model (GMM): Scenario: Predicting customer behavior for segmentation, such as differentiating between 
# high-value and low-value customers based on their spending patterns and income.

from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt

# Generating a synthetic dataset for customer spending patterns
X, _ = make_blobs(n_samples=500, centers=2, cluster_std=0.60, random_state=0)

# Fit a Gaussian Mixture Model with 2 components (clusters)
gmm = GaussianMixture(n_components=2)
gmm.fit(X)

# Generate new synthetic data points based on the learned distribution
new_samples, _ = gmm.sample(5)

# Visualize the clusters and generated data
plt.scatter(X[:, 0], X[:, 1], c='blue', label='Existing Customers')
plt.scatter(new_samples[:, 0], new_samples[:, 1], c='red', label='Generated Customers')
plt.legend()
plt.title("Gaussian Mixture Model - Customer Segmentation")
plt.show()

In [None]:
# Hidden Markov Model (HMM): Scenario: Predicting whether a customer will continue using a bank's services (churn prediction) 
# based on historical transactional data over time.

from hmmlearn.hmm import GaussianHMM
import numpy as np

# Synthetic data representing customer transaction behavior over time
# Let's assume 2 hidden states: 0 (Inactive) and 1 (Active)
X = np.array([[0.1], [0.2], [0.3], [0.4], [1.5], [1.6], [1.7], [0.5]])

# Fit a Hidden Markov Model with 2 states
model = GaussianHMM(n_components=2, covariance_type="diag", n_iter=1000)
model.fit(X)

# Predict the hidden states for the data
hidden_states = model.predict(X)

print("Predicted States:", hidden_states)

In [None]:
# Variational Autoencoder (VAE): Scenario: Generating new synthetic data for customer profiles (e.g., income, age,
# spending habits) for simulation or testing purposes in a bank.

from tensorflow.keras import layers, models
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

# Generating synthetic customer data (income, age, etc.)
X = np.random.rand(1000, 2)  # Synthetic data for two features

# Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# VAE model
latent_dim = 2  # Latent space dimension

inputs = layers.Input(shape=(2,))
h = layers.Dense(64, activation='relu')(inputs)
z_mean = layers.Dense(latent_dim)(h)
z_log_var = layers.Dense(latent_dim)(h)

z = layers.Lambda(lambda args: args[0] + tf.exp(args[1] / 2) * tf.random.normal(tf.shape(args[0])))([z_mean, z_log_var])

encoder = models.Model(inputs, [z_mean, z_log_var, z])

# Decoder
latent_inputs = layers.Input(shape=(latent_dim,))
x = layers.Dense(64, activation='relu')(latent_inputs)
decoded = layers.Dense(2, activation='sigmoid')(x)

decoder = models.Model(latent_inputs, decoded)

# VAE model (Encoder + Decoder)
vae = models.Model(inputs, decoder(encoder(inputs)[2]))

# Define the loss
xent_loss = 2 * tf.reduce_sum(X_scaled * tf.log(X_scaled / vae(inputs)) + (1 - X_scaled) * tf.log((1 - X_scaled) / (1 - vae(inputs))), axis=-1)
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)

vae_loss = tf.reduce_mean(xent_loss + kl_loss)

vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

# Train the model (for simplicity, using the same data for training)
vae.fit(X_scaled, epochs=10, batch_size=32)

# Generate new synthetic data (sample from latent space)
new_data = np.random.normal(size=(5, latent_dim))
generated_data = decoder.predict(new_data)

print("Generated Synthetic Data:")
print(generated_data)

In [None]:
# Generative Adversarial Network (GAN): Scenario: Generating synthetic customer transaction data to simulate realistic patterns 
# for training predictive models (e.g., fraud detection).

from tensorflow.keras import layers, models
import numpy as np
import tensorflow as tf

# GAN Generator Model
def build_generator(latent_dim):
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu', input_dim=latent_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(2, activation='sigmoid'))  # Output for 2 features
    return model

# GAN Discriminator Model
def build_discriminator():
    model = models.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=2))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification: real/fake
    return model

# GAN Model (Combining Generator and Discriminator)
latent_dim = 10
generator = build_generator(latent_dim)
discriminator = build_discriminator()

# Discriminator model
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# GAN model
discriminator.trainable = False
gan_input = layers.Input(shape=(latent_dim,))
x = generator(gan_input)
gan_output = discriminator(x)

gan = models.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer='adam')

# Training GAN
def train_gan(epochs=1000, batch_size=128):
    for epoch in range(epochs):
        # Train Discriminator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        generated_data = generator.predict(noise)
        
        real_data = np.random.rand(batch_size, 2)  # Synthetic real data for illustration
        
        X = np.concatenate([real_data, generated_data])
        y = np.concatenate([np.ones(batch_size), np.zeros(batch_size)])
        
        d_loss, d_acc = discriminator.train_on_batch(X, y)
        
        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        y_gen = np.ones(batch_size)  # Generator aims to fool the discriminator
        
        g_loss = gan.train_on_batch(noise, y_gen)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Discriminator Loss = {d_loss}, Generator Loss = {g_loss}")

train_gan()

In [None]:
# Generative Adversarial Networks (GANs): Scenario: Generating synthetic fraudulent transaction data to train fraud detection models.

from tensorflow.keras import layers, models
import numpy as np
import tensorflow as tf

# GAN Generator Model
def build_generator(latent_dim):
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu', input_dim=latent_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(2, activation='sigmoid'))  # Output for 2 features (e.g., amount, transaction type)
    return model

# GAN Discriminator Model
def build_discriminator():
    model = models.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=2))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification: real/fake
    return model

# GAN Model (Combining Generator and Discriminator)
latent_dim = 10
generator = build_generator(latent_dim)
discriminator = build_discriminator()

# Discriminator model
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# GAN model
discriminator.trainable = False
gan_input = layers.Input(shape=(latent_dim,))
x = generator(gan_input)
gan_output = discriminator(x)

gan = models.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer='adam')

# Training GAN
def train_gan(epochs=1000, batch_size=128):
    for epoch in range(epochs):
        # Train Discriminator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        generated_data = generator.predict(noise)
        
        real_data = np.random.rand(batch_size, 2)  # Synthetic real data for illustration
        
        X = np.concatenate([real_data, generated_data])
        y = np.concatenate([np.ones(batch_size), np.zeros(batch_size)])
        
        d_loss, d_acc = discriminator.train_on_batch(X, y)
        
        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        y_gen = np.ones(batch_size)  # Generator aims to fool the discriminator
        
        g_loss = gan.train_on_batch(noise, y_gen)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Discriminator Loss = {d_loss}, Generator Loss = {g_loss}")

train_gan()

In [None]:
# Adversarial Attacks and Robustness: Scenario: Generating adversarial examples to test the robustness of a fraud detection model.

import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Generating synthetic data (transaction amount and type for simplicity)
X = np.random.rand(1000, 2)  # Features: [transaction amount, transaction type]
y = (X[:, 0] > 0.5).astype(int)  # Fraud = 1 if amount > 0.5, else not fraud = 0

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a simple SVM model
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# Generate adversarial example: small perturbation to fool the model
X_adv = X_test + np.random.normal(0, 0.1, X_test.shape)  # Add noise to create adversarial examples

# Predict using the original and adversarial test data
y_pred_original = model.predict(X_test)
y_pred_adv = model.predict(X_adv)

# Calculate accuracy
acc_original = accuracy_score(y_test, y_pred_original)
acc_adv = accuracy_score(y_test, y_pred_adv)

print(f"Accuracy on Original Test Set: {acc_original}")
print(f"Accuracy on Adversarial Test Set: {acc_adv}")

In [None]:
# Adversarial Training: Scenario: Incorporating adversarial examples into the training process to improve the robustness of a fraud detection model.

import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generating synthetic data (transaction amount and type)
X = np.random.rand(1000, 2)
y = (X[:, 0] > 0.5).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Adversarial training function
def adversarial_training(model, X_train, y_train, X_test, y_test, epochs=5):
    for epoch in range(epochs):
        # Train on the original data
        model.fit(X_train, y_train)

        # Generate adversarial examples
        X_adv = X_train + np.random.normal(0, 0.1, X_train.shape)
        
        # Train on adversarial examples as well
        model.fit(X_adv, y_train)
        
        # Evaluate the model on both the original and adversarial test set
        y_pred_original = model.predict(X_test)
        y_pred_adv = model.predict(X_test + np.random.normal(0, 0.1, X_test.shape))
        
        acc_original = accuracy_score(y_test, y_pred_original)
        acc_adv = accuracy_score(y_test, y_pred_adv)
        
        print(f"Epoch {epoch}: Accuracy on Original Test Set = {acc_original}, Accuracy on Adversarial Test Set = {acc_adv}")

# Train a simple SVM model
model = SVC(kernel='linear')

# Adversarial training
adversarial_training(model, X_train, y_train, X_test, y_test)

In [None]:
# Adversarial Networks for Reinforcement Learning: Scenario: Training a reinforcement learning agent to handle adversarial economic 
# conditions that could simulate fraudulent activities.

import gym
import numpy as np
from stable_baselines3 import PPO

# Create a custom environment for financial decision-making
class BankEnv(gym.Env):
    def __init__(self):
        super(BankEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(3)  # 3 actions: approve loan, deny loan, flag for review
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)  # Random economic conditions

    def reset(self):
        return np.random.rand(5)

    def step(self, action):
        state = np.random.rand(5)
        reward = -1 if action == 2 else 1  # Deny flag results in penalty, else reward
        done = False
        return state, reward, done, {}

# Initialize the custom environment
env = BankEnv()

# Initialize the agent (Proximal Policy Optimization - PPO)
model = PPO("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)

# Simulate an adversarial environment by introducing adversarial economic conditions
adversarial_conditions = np.random.rand(5) * 0.5  # Example of adversarial changes

# Test the agent with adversarial conditions
state = adversarial_conditions
action = model.predict(state)[0]

print(f"Adversarial Action: {action}")

In [None]:
# Collaborative Filtering (User-Item Based): Scenario: Recommending financial products (like loans, credit cards) based on
# the preferences of similar users.

import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Example transaction data (user_id, product_id)
data = {
    'user_id': [1, 2, 3, 4, 5, 6],
    'loan': [1, 0, 1, 0, 1, 0],
    'credit_card': [0, 1, 1, 0, 1, 1],
    'mortgage': [1, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)

# Model: Nearest Neighbors (Collaborative Filtering)
X = df.drop(columns=['user_id'])  # Features: transaction data

# Fit Nearest Neighbors model
model = NearestNeighbors(n_neighbors=2, metric='cosine')
model.fit(X)

# Find similar users to user 1
distances, indices = model.kneighbors([X.iloc[0]])

# Recommend products from similar users
recommended_products = X.iloc[indices[0]].sum(axis=0).sort_values(ascending=False).index[0:2]  # Top 2 recommended products
print("Recommended products for User 1:", recommended_products)

In [None]:
# Content-Based Filtering: Scenario: Recommending a financial product (like a credit card) based on the features of the 
# product and user’s past behavior (e.g., transaction data).

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Example data (product_id, product description)
product_data = {
    'product_id': [1, 2, 3, 4],
    'product_description': ['Gold credit card', 'Platinum credit card', 'Student credit card', 'Cashback credit card'],
}

df = pd.DataFrame(product_data)

# User's past interaction (the user prefers Gold and Platinum credit cards)
user_preferences = ['Gold credit card', 'Platinum credit card']

# Convert text to vector form
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['product_description'])

# User's preference vector
user_vector = vectorizer.transform(user_preferences)

# Calculate cosine similarity
similarity_scores = cosine_similarity(user_vector, tfidf_matrix)

# Get the top recommended product
top_product_index = similarity_scores.argmax()
recommended_product = df.iloc[top_product_index]['product_description']
print("Recommended product:", recommended_product)

In [None]:
# Hybrid Models: Scenario: Recommending financial products by combining customer preferences (content-based) with their 
# interactions (collaborative filtering).

from sklearn.metrics import pairwise_distances
import numpy as np

# Example user-item interaction matrix (user, loan, credit card, mortgage)
data = {
    'user_id': [1, 2, 3, 4, 5],
    'loan': [1, 0, 1, 0, 1],
    'credit_card': [0, 1, 1, 0, 1],
    'mortgage': [1, 1, 0, 0, 1]
}
df = pd.DataFrame(data)

# Content-based: Item features (e.g., product categories)
item_features = np.array([[1, 0, 0],  # Loan
                          [0, 1, 0],  # Credit Card
                          [0, 0, 1]]) # Mortgage

# Collaborative-based: User-item interaction matrix (e.g., loan, credit card, mortgage interactions)
user_item_matrix = df.drop(columns=['user_id']).values

# Hybrid model: Combine both
user_distances = pairwise_distances(user_item_matrix, metric='cosine')
item_distances = pairwise_distances(item_features, metric='cosine')

# Combine collaborative and content-based distances
combined_distances = user_distances + item_distances.T

# Recommend products for user 1
recommended_product_idx = np.argmin(combined_distances[0])
recommended_product = df.columns[recommended_product_idx + 1]  # Skip user_id column
print(f"Recommended product for User 1: {recommended_product}")

In [None]:
# Matrix Factorization (e.g., SVD): Scenario: Predicting which financial product a user is likely to apply for based on past interactions.

from sklearn.decomposition import TruncatedSVD
import pandas as pd
import numpy as np

# Example user-item interaction matrix (user, loan, credit card, mortgage)
data = {
    'user_id': [1, 2, 3, 4, 5],
    'loan': [1, 0, 1, 0, 1],
    'credit_card': [0, 1, 1, 0, 1],
    'mortgage': [1, 1, 0, 0, 1]
}
df = pd.DataFrame(data)

# Matrix factorization: Apply Singular Value Decomposition (SVD)
X = df.drop(columns=['user_id']).values
svd = TruncatedSVD(n_components=2)
X_svd = svd.fit_transform(X)

# Predict missing values (which product a user is likely to apply for)
predicted_ratings = np.dot(X_svd, svd.components_)

# Predict for User 1 (which product they might apply for)
user_1_predictions = predicted_ratings[0]
recommended_product_idx = np.argmax(user_1_predictions)
recommended_product = df.columns[recommended_product_idx + 1]  # Skip user_id column
print(f"Recommended product for User 1: {recommended_product}")

In [None]:
# Association Rule Learning (Apriori Algorithm): Scenario: Recommending related financial products based on past user 
# transactions (e.g., credit card after loan approval).

from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

# Example user-product transaction data
data = {
    'loan': [1, 0, 1, 1, 0],
    'credit_card': [0, 1, 1, 1, 0],
    'mortgage': [1, 0, 0, 1, 0],
}
df = pd.DataFrame(data)

# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1.0)

# Display recommendations based on rules
print(rules[['antecedents', 'consequents', 'lift']])

In [None]:
# # Python Code Example for User-Based Collaborative Filtering: 

import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Example user-item matrix (user, loan, credit card, mortgage)
data = {
    'user_id': [1, 2, 3, 4, 5],
    'loan': [1, 0, 1, 0, 1],
    'credit_card': [0, 1, 1, 0, 1],
    'mortgage': [1, 1, 0, 0, 1]
}

df = pd.DataFrame(data)

# Extract the features (excluding user_id)
X = df.drop(columns=['user_id'])

# Build the model: Nearest Neighbors (User-based CF)
model = NearestNeighbors(n_neighbors=3, metric='cosine')
model.fit(X)

# Find the 3 most similar users to user 1
distances, indices = model.kneighbors([X.iloc[0]])

# Print similar users and the products they are likely to be interested in
similar_users = X.iloc[indices[0]]
recommended_products = similar_users.sum(axis=0).sort_values(ascending=False).index[:2]  # Top 2 recommended products
print("Recommended products for User 1 based on similar users:", recommended_products)

In [None]:
# # Python Code Example for Item-Based Collaborative Filtering

from sklearn.metrics.pairwise import cosine_similarity

# Example user-item matrix (user, loan, credit card, mortgage)
data = {
    'user_id': [1, 2, 3, 4, 5],
    'loan': [1, 0, 1, 0, 1],
    'credit_card': [0, 1, 1, 0, 1],
    'mortgage': [1, 1, 0, 0, 1]
}

df = pd.DataFrame(data)

# Extract the user-item interaction matrix
X = df.drop(columns=['user_id'])

# Calculate cosine similarity between items
item_similarity = cosine_similarity(X.T)  # Transpose so that items are compared

# Recommend similar products to 'loan'
loan_idx = df.columns.get_loc('loan')  # Get the index of 'loan' column
similar_items = item_similarity[loan_idx]

# Print the top 2 most similar items to 'loan'
recommended_items = df.columns[similar_items.argsort()[-3:][::-1]]  # Top 2 recommended items
print("Items similar to 'loan' that might be recommended:", recommended_items[1:])

In [None]:
# # Python Code Example for Matrix Factorization using SVD

from sklearn.decomposition import TruncatedSVD
import numpy as np

# Example user-item matrix (user, loan, credit card, mortgage)
data = {
    'user_id': [1, 2, 3, 4, 5],
    'loan': [1, 0, 1, 0, 1],
    'credit_card': [0, 1, 1, 0, 1],
    'mortgage': [1, 1, 0, 0, 1]
}

df = pd.DataFrame(data)

# Extract the user-item matrix (exclude user_id column)
X = df.drop(columns=['user_id']).values

# Apply Singular Value Decomposition (SVD)
svd = TruncatedSVD(n_components=2)  # Reduce the matrix to 2 components
X_svd = svd.fit_transform(X)

# Predict missing values (which product a user is likely to apply for)
predicted_ratings = np.dot(X_svd, svd.components_)

# Predict for User 1 (which product they might apply for)
user_1_predictions = predicted_ratings[0]
recommended_product_idx = np.argmax(user_1_predictions)
recommended_product = df.columns[recommended_product_idx + 1]  # Skip user_id column
print(f"Recommended product for User 1 based on SVD: {recommended_product}")

In [None]:
# # Python Code (GPT-3 with OpenAI API):

import openai

# Set your OpenAI API key
openai.api_key = "your-api-key"

def generate_response(prompt):
    response = openai.Completion.create(
        engine="text-davinci-003",  # GPT-3 model
        prompt=prompt,
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Example banking query
prompt = "I want to check the balance in my savings account."
response = generate_response(prompt)
print(response)

In [None]:
# # Python Code (BERT for Sentiment Analysis using HuggingFace's Transformers):

from transformers import pipeline

# Load sentiment-analysis pipeline from Hugging Face
sentiment_analysis = pipeline("sentiment-analysis")

# Example feedback
feedback = "The customer service at your branch was excellent!"

# Perform sentiment analysis
result = sentiment_analysis(feedback)
print(result)

In [None]:
# # Python Code (BERT for Masked Language Modeling):

from transformers import pipeline

# Load masked language model
fill_mask = pipeline("fill-mask", model="bert-base-uncased")

# Example with a masked token
text = "I want to withdraw money from my [MASK] account."
result = fill_mask(text)
print(result)


In [None]:
# # Python Code (T5 for Summarization using HuggingFace's Transformers):

from transformers import pipeline

# Load T5 model for summarization
summarizer = pipeline("summarization", model="t5-base")

# Example transaction history
text = """
Customer John Doe made a series of transactions in December 2024. On December 1st, he deposited $2000 in his savings account. 
On December 5th, he withdrew $500 from the same account. Then, on December 10th, he transferred $1000 to his checking account.
"""

# Perform summarization
summary = summarizer(text)
print(summary[0]['summary_text'])


In [None]:
# # Python Code (Example with Vision and Text):

# This is a placeholder code since actual multimodal models require integration with computer vision libraries.
from transformers import VisionTextDualEncoderProcessor, VisionTextDualEncoderModel

# Example: This code is for illustration. In practice, we'd use image-to-text models to extract data from checks.
processor = VisionTextDualEncoderProcessor.from_pretrained('model_name')
model = VisionTextDualEncoderModel.from_pretrained('model_name')

# This step requires an image input, which isn't feasible to demonstrate in code here.


In [None]:
# Python Example for Tokenization and Preprocessing:

from transformers import BertTokenizer
import re

# Initialize a tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Sample banking-related text
text = "I want to transfer $500 to my savings account. Can you help me?"

# Preprocess the text (e.g., remove special characters)
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove non-alphanumeric characters
    return text

processed_text = preprocess_text(text)

# Tokenize the text
tokens = tokenizer(processed_text, padding=True, truncation=True, return_tensors="pt")
print(tokens)


In [None]:
# Python Example for Pretraining GPT-like Model:

from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments

# Load GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Example training dataset
train_data = ["Bank account balance check", "Transfer funds to checking account", "Apply for a home loan"]

# Tokenize the training data
train_encodings = tokenizer(train_data, truncation=True, padding=True, return_tensors="pt")

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=1,              # Number of training epochs
    per_device_train_batch_size=4,   # Batch size
    logging_dir='./logs',            # Log directory
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_encodings
)

# Start training
trainer.train()


In [None]:
# Python Example for Fine-tuning BERT for Text Classification:

from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

# Load a dataset (for example, a dataset of customer complaints)
dataset = load_dataset("banking77")  # Hypothetical dataset related to banking services

# Load a pretrained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)  # 5 classes, e.g., loan, transfer, balance, etc.

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    evaluation_strategy="epoch",     # Evaluate after each epoch
    per_device_train_batch_size=16,  # Adjust depending on your GPU memory
    per_device_eval_batch_size=16,
    num_train_epochs=3,              # Number of epochs
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test']
)

# Fine-tune the model
trainer.train()


In [None]:
# Python Example for Model Evaluation:

# Evaluate the trained model on the validation dataset
results = trainer.evaluate()

# Print evaluation results
print(results)


In [None]:
# Python Example for Making Predictions:

# Making a prediction with the fine-tuned model
input_text = "How do I transfer funds to another account?"

# Tokenize the input text
inputs = tokenizer(input_text, return_tensors="pt")

# Predict the response
outputs = model.generate(inputs['input_ids'])

# Decode the output tokens
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)


In [None]:
# # Python Code: Tokenization with BERT

from transformers import BertTokenizer

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example banking sentence
sentence = "Can you check my account balance?"

# Tokenize the sentence
tokens = tokenizer(sentence, return_tensors='pt')

# Display tokenized input
print(tokens)


In [None]:
# # Python Code: Attention in GPT (Text Generation)

from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load GPT-2 tokenizer and model
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Encode the prompt
prompt = "I want to transfer $500 to my savings account."
inputs = tokenizer(prompt, return_tensors='pt')

# Get the model’s prediction (i.e., the next word prediction)
outputs = model.generate(inputs['input_ids'], max_length=50)

# Decode the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


In [None]:
# # Python Code: Using BERT for Sentence Classification (e.g., Sentiment Analysis)

from transformers import BertForSequenceClassification, pipeline

# Load a pre-trained model and tokenizer for sentiment analysis
sentiment_analysis = pipeline('sentiment-analysis', model='bert-base-uncased')

# Example customer feedback from a banking service
feedback = "I am very happy with the customer service at your bank!"

# Perform sentiment analysis
result = sentiment_analysis(feedback)
print(result)


In [None]:
# # Python Code: Fine-tuning BERT for Banking Classification Task

from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

# Load a sample banking dataset (hypothetical example)
dataset = load_dataset('banking77')  # A hypothetical dataset for banking-related tasks

# Load a pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Tokenize the dataset
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    evaluation_strategy="epoch",     # Evaluate after each epoch
    per_device_train_batch_size=16,  # Batch size
    per_device_eval_batch_size=16,
    num_train_epochs=3,              # Number of epochs
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test']
)

# Fine-tune the model
trainer.train()


In [None]:
# # Python Code: Text Generation Using GPT-2

from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Prompt for banking scenario
prompt = "How can I apply for a home loan?"

# Tokenize input and generate response
inputs = tokenizer(prompt, return_tensors='pt')
outputs = model.generate(inputs['input_ids'], max_length=100)

# Decode the generated response
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


In [None]:
# # # Python Code Sample (Text Generation using GPT-2):

from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Sample prompt for the banking scenario
prompt = "What are the requirements to open a savings account at your bank?"

# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors='pt')

# Generate a response (text completion)
outputs = model.generate(inputs['input_ids'], max_length=100)

# Decode and print the generated response
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


In [None]:
# # # Python Code Sample (Sentiment Analysis using BERT):

from transformers import pipeline

# Load a pre-trained BERT model for sentiment analysis
sentiment_analysis = pipeline('sentiment-analysis', model='bert-base-uncased')

# Example customer feedback for sentiment analysis
feedback = "The loan application process was quick and easy. Great service!"

# Perform sentiment analysis
result = sentiment_analysis(feedback)
print(result)


In [None]:
# # # Python Code Sample (NER using SpaCy):

import spacy

# Load pre-trained SpaCy model
nlp = spacy.load("en_core_web_sm")

# Example banking sentence
text = "I transferred $500 to my account on 12/24/2024."

# Process the text
doc = nlp(text)

# Extract named entities
for ent in doc.ents:
    print(f"{ent.text} ({ent.label_})")


In [None]:
# # Python Code Sample (Summarization using T5):

from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load pre-trained T5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')

# Example long text (banking-related)
text = """
The bank offers a wide range of financial products including savings accounts, checking accounts, loans, mortgages, and investment options. 
Customers can open accounts online or at any of our local branches. 
We also provide digital banking services including mobile banking apps for easy account management. 
Our loan offerings include personal loans, business loans, and home loans.
"""

# Prepare the text for summarization
inputs = tokenizer("summarize: " + text, return_tensors='pt', max_length=512, truncation=True)

# Generate the summary
summary_ids = model.generate(inputs['input_ids'], max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)

# Decode the summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary)


In [None]:
# # Python Code Sample (QA using BERT):

from transformers import pipeline

# Load a pre-trained QA model
qa_pipeline = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')

# Example context (banking-related document)
context = """
The bank offers various types of loans, including personal loans, auto loans, and home loans. 
To apply for a loan, customers must submit proof of income, identification, and credit history.
"""

# Example question
question = "What do I need to apply for a loan?"

# Perform question answering
result = qa_pipeline(question=question, context=context)
print(result)


In [None]:
# # Python Code Sample (STT using SpeechRecognition):

import speech_recognition as sr

# Initialize recognizer
recognizer = sr.Recognizer()

# Use the microphone as the audio source
with sr.Microphone() as source:
    print("Listening for speech...")
    audio = recognizer.listen(source)

# Convert speech to text
text = recognizer.recognize_google(audio)
print(f"Recognized text: {text}")
# # Python Code Sample (TTS using pyttsx3):

import pyttsx3

# Initialize the text-to-speech engine
engine = pyttsx3.init()

# Example response from a chatbot
response = "Your loan application has been successfully submitted."

# Convert text to speech
engine.say(response)
engine.runAndWait()


In [None]:
# # Python Code Sample (Translation using MarianMT):

from transformers import MarianMTModel, MarianTokenizer

# Load pre-trained MarianMT model for translation
model_name = 'Helsinki-NLP/opus-mt-en-fr'  # English to French
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

# Example sentence
sentence = "How can I apply for a loan?"

# Tokenize the sentence
inputs = tokenizer(sentence, return_tensors="pt", padding=True)

# Translate
translated = model.generate(inputs['input_ids'])
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
print(translated_text)


In [None]:
# # Python Code Sample (Document Processing using Tesseract and NLP):

import pytesseract
from PIL import Image
from transformers import pipeline

# Example document image (e.g., bank statement)
image = Image.open('bank_statement.jpg')

# Extract text from the image using Tesseract OCR
text = pytesseract.image_to_string(image)

# Use NLP model to analyze extracted text
nlp = pipeline("ner")
ner_results = nlp(text)
print(ner_results)


In [None]:
# # Python Code Sample (Tokenization using BERT):

from transformers import BertTokenizer

# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example sentence
sentence = "Can you check my account balance?"

# Tokenize the sentence
tokens = tokenizer.tokenize(sentence)

# Convert tokens to input IDs for BERT
input_ids = tokenizer.convert_tokens_to_ids(tokens)

# Display tokens and input IDs
print("Tokens:", tokens)
print("Input IDs:", input_ids)


In [None]:
# # Python Code Sample (Word Embeddings using BERT):

from transformers import BertModel, BertTokenizer
import torch

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Example sentence
sentence = "Can you check my account balance?"

# Tokenize the sentence
inputs = tokenizer(sentence, return_tensors="pt")

# Get token embeddings from BERT
outputs = model(**inputs)
embeddings = outputs.last_hidden_state

# Display embeddings of the first token
print(embeddings[0][0])  # Embedding for the first token 'can'


In [None]:
# # Python Code Sample (Self-Attention in BERT):

from transformers import BertModel, BertTokenizer

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Example sentence
sentence = "I need to transfer $500 to my savings account."

# Tokenize and get attention scores
inputs = tokenizer(sentence, return_tensors="pt")
outputs = model(**inputs, output_attentions=True)

# Attention weights of the last layer
attentions = outputs.attentions[-1]
print(attentions)


In [None]:
# # Python Code Sample (Using Transformer in Hugging Face):

from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load pre-trained T5 model and tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')

# Example input text
input_text = "Translate English to French: How are you?"

# Tokenize input text
inputs = tokenizer(input_text, return_tensors="pt")

# Generate output (translation)
outputs = model.generate(inputs['input_ids'], max_length=40)

# Decode output
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)


In [None]:
# # Python Code Sample (Fine-tuning a Pre-trained Model for Sentiment Analysis):

from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
from datasets import load_dataset

# Load a sentiment analysis dataset (e.g., IMDb)
dataset = load_dataset("imdb")

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          
    evaluation_strategy="epoch",    
    per_device_train_batch_size=8,  
    num_train_epochs=3,
)

# Initialize Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['test']
)

# Fine-tune the model
trainer.train()


In [None]:
# # Python Code Sample (Contextualized Representations using BERT):

from transformers import BertModel, BertTokenizer

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Example sentence with the word "bank"
sentence = "I went to the river bank."

# Tokenize input text
inputs = tokenizer(sentence, return_tensors="pt")

# Get contextualized embeddings
outputs = model(**inputs)
embeddings = outputs.last_hidden_state

# Display embeddings for the word "bank" (token index 3)
print(embeddings[0][3])


In [None]:
# # Python Code Sample (Autoregressive Generation with GPT-2):

from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Example prompt for generating text
prompt = "How do I apply for a loan at your bank?"

# Tokenize input text
inputs = tokenizer(prompt, return_tensors="pt")

# Generate text (autoregressive generation)
outputs = model.generate(inputs['input_ids'], max_length=50)

# Decode and print the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


In [None]:
# # Python Code Sample (Using GLUE Dataset for Text Classification with BERT):

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

# Load the GLUE dataset (SST-2 for sentiment analysis)
dataset = load_dataset("glue", "sst2")

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          
    evaluation_strategy="epoch",    
    per_device_train_batch_size=8,  
    num_train_epochs=3,
)

# Initialize Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation']
)

# Fine-tune the model on GLUE dataset
trainer.train()


In [None]:
# # Python Code Sample (Using SuperGLUE Dataset):

from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

# Load the SuperGLUE dataset (e.g., the "BoolQ" task for question answering)
dataset = load_dataset("super_glue", "boolq")

# Load pre-trained RoBERTa model and tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['question'], examples['passage'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          
    evaluation_strategy="epoch",    
    per_device_train_batch_size=8,  
    num_train_epochs=3,
)

# Initialize Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation']
)

# Fine-tune the model on SuperGLUE dataset
trainer.train()


In [None]:
# # Python Code Sample (Using SQuAD for Question Answering):

from transformers import BertTokenizer, BertForQuestionAnswering, Trainer, TrainingArguments
from datasets import load_dataset

# Load the SQuAD dataset
dataset = load_dataset("squad")

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Tokenize the dataset for question answering
def tokenize_function(examples):
    return tokenizer(examples['question'], examples['context'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          
    evaluation_strategy="epoch",    
    per_device_train_batch_size=8,  
    num_train_epochs=3,
)

# Initialize Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation']
)

# Fine-tune the model on SQuAD dataset
trainer.train()


In [None]:
# # Python Code Sample (Zero-Shot Classification using Hugging Face):

from transformers import pipeline

# Load zero-shot classification pipeline
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Example text and candidate labels
sequence = "How do I apply for a personal loan?"
candidate_labels = ["finance", "health", "technology", "sports"]

# Perform zero-shot classification
result = zero_shot_classifier(sequence, candidate_labels)

# Display result
print(result)


In [None]:
# # Python Code Sample (Using ROUGE for Text Generation Evaluation):

from datasets import load_metric

# Load the ROUGE metric
rouge = load_metric("rouge")

# Example of generated and reference texts
generated_text = "To open a savings account, visit our branch."
reference_text = "Visit our branch to open a savings account."

# Evaluate the generated text
results = rouge.compute(predictions=[generated_text], references=[reference_text])

# Display ROUGE score
print(results)


In [None]:
# # Python Code Sample (Using EvalResults for Multiple Metrics):

from transformers import Trainer, TrainingArguments
from datasets import load_dataset
from transformers import BertForSequenceClassification, BertTokenizer

# Load dataset and model
dataset = load_dataset("glue", "sst2")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# Tokenize and prepare dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    num_train_epochs=3
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

# Evaluate model
eval_results = trainer.evaluate()
print(eval_results)


In [None]:
# # Python Code Sample (Accuracy for Text Classification Task):

from sklearn.metrics import accuracy_score
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset

# Load the GLUE dataset (SST-2 task for sentiment analysis)
dataset = load_dataset("glue", "sst2")

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    per_device_train_batch_size=8,
    num_train_epochs=3,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

# Train the model
trainer.train()

# Evaluate the model and calculate accuracy
predictions, labels, _ = trainer.predict(tokenized_datasets['validation'])
predicted_labels = predictions.argmax(axis=-1)
accuracy = accuracy_score(labels, predicted_labels)

print(f'Accuracy: {accuracy:.4f}')


In [None]:
# # Python Code Sample (Precision, Recall, F1-Score for Text Classification):

from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate Precision, Recall, and F1-Score
precision = precision_score(labels, predicted_labels)
recall = recall_score(labels, predicted_labels)
f1 = f1_score(labels, predicted_labels)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')


In [None]:
# # Python Code Sample (Using ROUGE for Text Summarization):

from datasets import load_metric

# Load ROUGE metric
rouge = load_metric("rouge")

# Example generated and reference texts for summarization
generated_text = "The customer requested a loan to buy a new house."
reference_text = "A customer applied for a home loan."

# Compute ROUGE score
results = rouge.compute(predictions=[generated_text], references=[reference_text])

# Display ROUGE score
print(results)


In [None]:
# # Python Code Sample (Using BLEU for Text Generation Evaluation):

from nltk.translate.bleu_score import sentence_bleu

# Example generated and reference sentences for translation or generation
generated_text = "The loan application was processed successfully."
reference_text = ["The loan application has been successfully processed."]

# Compute BLEU score
bleu_score = sentence_bleu(reference_text, generated_text.split())

# Display BLEU score
print(f"BLEU Score: {bleu_score:.4f}")


In [None]:
# # Python Code Sample (Using Perplexity for Language Modeling):

from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load GPT2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Example text for language modeling
input_text = "The bank offers various services."

# Tokenize input
inputs = tokenizer(input_text, return_tensors="pt")

# Compute loss and perplexity
with torch.no_grad():
    outputs = model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss
    perplexity = torch.exp(loss)

print(f"Perplexity: {perplexity.item():.4f}")


In [None]:
# # Python Code Sample (Using MAP for Text Retrieval):

from sklearn.metrics import average_precision_score

# Example relevance scores for retrieved documents
y_true = [1, 0, 1, 0, 1]  # Relevance of documents
y_scores = [0.9, 0.1, 0.75, 0.2, 0.85]  # Model scores for documents

# Compute MAP
map_score = average_precision_score(y_true, y_scores)

print(f"Mean Average Precision (MAP): {map_score:.4f}")


In [None]:
# # Python Code Sample (Using ROC AUC for Binary Classification):

from sklearn.metrics import roc_auc_score

# Example model predictions and true labels
y_true = [0, 1, 0, 1, 0]  # True labels
y_probs = [0.1, 0.9, 0.3, 0.7, 0.2]  # Predicted probabilities for class 1

# Compute ROC AUC score
roc_auc = roc_auc_score(y_true, y_probs)

print(f"ROC AUC: {roc_auc:.4f}")


In [None]:
# # Python Code: Confidence Scoring with BERT

from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import torch

# Load pre-trained BERT model and tokenizer for binary classification (e.g., sentiment analysis)
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Input sentence
sentence = "I love the customer service at this bank!"

# Tokenize input text
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# Apply softmax to get probability scores (confidence)
probs = softmax(logits, dim=-1)
confidence_score = torch.max(probs).item()
predicted_class = torch.argmax(probs).item()

print(f"Predicted Class: {predicted_class}")
print(f"Confidence Score: {confidence_score:.4f}")


In [None]:
# # Python Code: Upper and Lower Bound for Accuracy

from sklearn.metrics import accuracy_score
import numpy as np

# Simulated true labels and predictions (for example, in a fraud detection model)
true_labels = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 0])
predictions = np.array([0, 1, 0, 0, 1, 0, 1, 1, 1, 0])

# Compute accuracy
accuracy = accuracy_score(true_labels, predictions)

# Compute lower bound (accuracy of a random classifier)
random_accuracy = np.mean(np.random.choice([0, 1], size=true_labels.shape[0]))

# Compute upper bound (accuracy of a perfect classifier)
perfect_accuracy = 1.0  # Ideal accuracy for perfect predictions

print(f"Model Accuracy: {accuracy:.4f}")
print(f"Random Classifier Accuracy (Lower Bound): {random_accuracy:.4f}")
print(f"Perfect Classifier Accuracy (Upper Bound): {perfect_accuracy:.4f}")


In [None]:
# # Python Code: Hyperparameter Optimization using GridSearchCV

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load a sample dataset (Iris)
data = load_iris()
X = data.data
y = data.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grid (upper and lower bounds for hyperparameters)
param_grid = {
    'C': [0.1, 1, 10],  # Regularization strength (lower and upper bounds)
    'kernel': ['linear', 'rbf'],  # Type of SVM kernel
    'gamma': [0.001, 0.01, 0.1]  # Kernel coefficient (lower and upper bounds)
}

# Initialize the model and perform GridSearchCV
svc = SVC()
grid_search = GridSearchCV(svc, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Output best parameters and model performance
print(f"Best Hyperparameters: {grid_search.best_params_}")
print(f"Best Accuracy: {grid_search.best_score_:.4f}")


In [None]:
# # Python Code: Overfitting in Logistic Regression

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model with a high degree of complexity (C=100)
model = LogisticRegression(C=100)  # Higher C value leads to more complex model
model.fit(X_train, y_train)

# Predict on training and test data
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculate accuracy on training and test data
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")


In [None]:
# Python Code: Regularization in Logistic Regression (L2 Regularization)

# Train a logistic regression model with L2 regularization (Ridge)
model_ridge = LogisticRegression(C=1)  # Lower C value means stronger regularization
model_ridge.fit(X_train, y_train)

# Predict on training and test data
y_train_pred_ridge = model_ridge.predict(X_train)
y_test_pred_ridge = model_ridge.predict(X_test)

# Calculate accuracy on training and test data
train_accuracy_ridge = accuracy_score(y_train, y_train_pred_ridge)
test_accuracy_ridge = accuracy_score(y_test, y_test_pred_ridge)

print(f"Training Accuracy with Regularization: {train_accuracy_ridge:.4f}")
print(f"Testing Accuracy with Regularization: {test_accuracy_ridge:.4f}")


In [None]:
# Python Code: Cross-Validation to Detect Overfitting

from sklearn.model_selection import cross_val_score

# Cross-validation with a logistic regression model
model_cv = LogisticRegression(C=1)  # Regularized model to avoid overfitting
cv_scores = cross_val_score(model_cv, X, y, cv=5)  # 5-fold cross-validation

print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {np.mean(cv_scores):.4f}")


In [None]:
# Python Code: Simulating a Broad Dataset

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Create a synthetic broad dataset for loan approval prediction
n_samples = 1000
n_features = 15  # Number of features in the dataset

# Generating random features (e.g., age, income, credit score, etc.)
data = {
    'age': np.random.randint(18, 70, size=n_samples),
    'income': np.random.randint(20000, 120000, size=n_samples),
    'credit_score': np.random.randint(300, 850, size=n_samples),
    'loan_amount': np.random.randint(5000, 50000, size=n_samples),
    'debt_to_income_ratio': np.random.uniform(0, 1, size=n_samples),
    'employment_length': np.random.randint(1, 30, size=n_samples),
    'monthly_expenses': np.random.randint(1000, 5000, size=n_samples),
    'savings_balance': np.random.randint(0, 20000, size=n_samples),
    'is_homeowner': np.random.choice([0, 1], size=n_samples),
    'education_level': np.random.choice(['High School', 'Undergraduate', 'Graduate'], size=n_samples),
    'loan_history': np.random.choice([0, 1], size=n_samples),  # 0 = No previous loan, 1 = Previous loan taken
    'marital_status': np.random.choice(['Single', 'Married', 'Divorced'], size=n_samples),
    'num_dependents': np.random.randint(0, 5, size=n_samples),
    'region': np.random.choice(['Urban', 'Suburban', 'Rural'], size=n_samples),
    'employment_status': np.random.choice(['Employed', 'Self-Employed', 'Unemployed'], size=n_samples)
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Target variable: Whether the loan was approved (1 = Approved, 0 = Rejected)
df['loan_approval'] = np.random.choice([0, 1], size=n_samples)

# Show the first few rows of the dataset
print(df.head())


In [None]:
# Python Code: Dimensionality Reduction Using PCA

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Select numerical columns for PCA (excluding the target column 'loan_approval')
numerical_columns = ['age', 'income', 'credit_score', 'loan_amount', 'debt_to_income_ratio',
                     'employment_length', 'monthly_expenses', 'savings_balance', 'num_dependents']
X = df[numerical_columns]

# Standardize the data (important for PCA)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA to reduce the dimensionality
pca = PCA(n_components=5)  # Reducing to 5 components
X_pca = pca.fit_transform(X_scaled)

# Include the target variable
y = df['loan_approval']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Train a model on the reduced dataset (Random Forest for classification)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy on test set after PCA: {accuracy:.4f}")


In [None]:
# Python Code: Feature Selection with Lasso (L1 Regularization)

from sklearn.linear_model import LassoCV
from sklearn.metrics import accuracy_score

# Use the numerical columns for training
X = df[numerical_columns]
y = df['loan_approval']

# Standardize the data
X_scaled = scaler.fit_transform(X)

# Apply Lasso Regression for feature selection
lasso = LassoCV(cv=5)
lasso.fit(X_scaled, y)

# Print the coefficients to see which features were selected
selected_features = np.array(numerical_columns)[lasso.coef_ != 0]
print(f"Selected features by Lasso: {selected_features}")

# Train a Random Forest Classifier using only selected features
X_selected = X[selected_features]
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy after feature selection: {accuracy:.4f}")


In [None]:
# Python Code: Train-Test Split and Model Evaluation for Loan Approval

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Generate a synthetic dataset for loan approval prediction
np.random.seed(42)

# Creating a synthetic dataset with 1000 samples
data = {
    'age': np.random.randint(18, 70, size=1000),
    'income': np.random.randint(20000, 120000, size=1000),
    'credit_score': np.random.randint(300, 850, size=1000),
    'loan_amount': np.random.randint(5000, 50000, size=1000),
    'debt_to_income_ratio': np.random.uniform(0, 1, size=1000),
    'employment_length': np.random.randint(1, 30, size=1000),
    'loan_approval': np.random.choice([0, 1], size=1000)  # Target variable: 0 = Rejected, 1 = Approved
}

df = pd.DataFrame(data)

# Features and target variable
X = df.drop(columns=['loan_approval'])
y = df['loan_approval']

# Split the data into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict the loan approval on the test set
y_pred = model.predict(X_test)

# Evaluate the model using accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{class_report}")


In [None]:
# Python Code: Cross-Validation Using K-Fold for Loan Approval

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

# Use the entire dataset
X = df.drop(columns=['loan_approval'])
y = df['loan_approval']

# Initialize a RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')

print(f"Cross-Validation Accuracy Scores: {cv_scores}")
print(f"Mean Accuracy: {cv_scores.mean():.4f}")


In [None]:
# Python Code: Simulating Fraud Detection and Finite Assessment

# Generate synthetic fraud detection dataset
data_fraud = {
    'transaction_amount': np.random.randint(5, 1000, size=1000),
    'transaction_frequency': np.random.randint(1, 20, size=1000),
    'is_international': np.random.choice([0, 1], size=1000),
    'is_fraud': np.random.choice([0, 1], size=1000)  # 0 = No Fraud, 1 = Fraud
}

df_fraud = pd.DataFrame(data_fraud)

# Features and target variable
X_fraud = df_fraud.drop(columns=['is_fraud'])
y_fraud = df_fraud['is_fraud']

# Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.3, random_state=42)

# Train a RandomForest Classifier for fraud detection
model_fraud = RandomForestClassifier(n_estimators=100, random_state=42)
model_fraud.fit(X_train_fraud, y_train_fraud)

# Predictions and evaluation
y_pred_fraud = model_fraud.predict(X_test_fraud)
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)

print(f"Fraud Detection Model Accuracy: {accuracy_fraud:.4f}")


In [None]:
# Customer Support Automation (Text Classification): In a banking scenario, LLMs can be used to classify customer queries into 
# different categories like "Loan Inquiry", "Account Balance", "Transaction Issue", etc.
# Example Code using transformers library:

from transformers import pipeline

# Load a pre-trained model
model_name = "distilbert-base-uncased"
classifier = pipeline("zero-shot-classification", model=model_name)

# Sample banking customer query
query = "I want to check my loan balance"

# Define candidate labels
candidate_labels = ["Loan Inquiry", "Account Balance", "Transaction Issue", "Customer Support"]

# Classify the query
result = classifier(query, candidate_labels)
print(result)
This model will classify the query as "Loan Inquiry", based on the available labels. This can be useful in automating customer service.
2. Fraud Detection (Sentiment Analysis)
Sentiment analysis can be leveraged to identify potential fraudulent activity by detecting abnormal or suspicious patterns in customer communications.
Example Code for Sentiment Analysis:

from transformers import pipeline

# Load a pre-trained sentiment-analysis model
sentiment_analyzer = pipeline("sentiment-analysis")

# Example banking transaction message
transaction_message = "I did not make this transaction, please help!"

# Analyze sentiment
result = sentiment_analyzer(transaction_message)
print(result)
The result might show negative sentiment, which could indicate a potential fraudulent activity or an issue the customer needs to resolve.
3. Loan Risk Assessment (Text Summarization)
Loan applications often come with large amounts of documentation. Using LLMs for summarizing documents can significantly improve decision-making processes for loan risk assessments.
Example Code for Document Summarization:

from transformers import pipeline

# Load a pre-trained summarization model
summarizer = pipeline("summarization")

# Sample loan application text
loan_application_text = """
John Doe has been employed for over 5 years at Tech Corp, earning a monthly salary of $5000. His credit score is 720.
He is requesting a loan of $20,000 to purchase a car. His monthly expenses include rent of $1500 and student loan payments
of $200. He has no previous history of loan defaults.
"""

# Generate summary
summary = summarizer(loan_application_text, max_length=100, min_length=50, do_sample=False)
print(summary)
This would generate a concise summary of the loan application, highlighting the key points that will help assess the loan's risk.
4. Automated Report Generation (Text Generation)
LLMs can automatically generate reports, such as quarterly financial summaries or audit reports, using natural language generation.
Example Code for Text Generation:

from transformers import pipeline

# Load a pre-trained text-generation model
generator = pipeline("text-generation", model="gpt-2")

# Input prompt for generating a financial report
prompt = "Generate a summary of the bank's financial performance for Q4 2024."

# Generate the report
report = generator(prompt, max_length=150)
print(report[0]['generated_text'])
This model will generate a summary or report based on the provided prompt. For the banking scenario, this can be used to automate report generation for monthly or quarterly financial summaries.
5. Multi-lingual Customer Support (Machine Translation)
For banks with international customers, an LLM can be used for language translation, enabling support for multiple languages.
Example Code for Machine Translation:

from transformers import pipeline

# Load a pre-trained translation model (from English to French)
translator = pipeline("translation_en_to_fr", model="t5-small")

# Sample customer query in English
query = "What is the status of my loan application?"

# Translate the query to French
translation = translator(query)
print(translation)
# This model would translate the customer query into French, allowing customer support agents or automated systems 
     # to respond in the customer's preferred language.


In [None]:
# Code Example for Document Verification (OCR + Text Processing)

from transformers import pipeline
import pytesseract
from PIL import Image

# OCR: Extract text from image (for document verification)
image_path = "bank_statement.jpg"
img = Image.open(image_path)
text_from_image = pytesseract.image_to_string(img)

# Use a pre-trained model for text classification (e.g., checking if the extracted text is related to banking)
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased")

candidate_labels = ["Loan Inquiry", "Account Balance", "Transaction Issue", "Bank Statement"]
result = classifier(text_from_image, candidate_labels)
print(f"Extracted Text: {text_from_image}")
print(f"Classification Result: {result}")


In [None]:
# Code Example for Text + Audio (Speech-to-Text + Sentiment Analysis)

from transformers import pipeline
import speech_recognition as sr

# Initialize speech recognizer
recognizer = sr.Recognizer()
audio_path = "customer_call.wav"

# Convert speech to text
with sr.AudioFile(audio_path) as source:
    audio_data = recognizer.record(source)
    transcript = recognizer.recognize_google(audio_data)

# Sentiment Analysis on the transcribed text
sentiment_analyzer = pipeline("sentiment-analysis")
sentiment = sentiment_analyzer(transcript)

print(f"Transcript: {transcript}")
print(f"Sentiment: {sentiment}")


In [None]:
# Code Example for Text + Video (Video Summarization)

from transformers import pipeline
import moviepy.editor as mp

# Load the video file
video_path = "bank_video.mp4"
video = mp.VideoFileClip(video_path)

# Extract audio from video for further processing
audio = video.audio
audio.write_audiofile("extracted_audio.wav")

# You could use pre-trained video captioning models, but here we'll use basic summarization for text
summarizer = pipeline("summarization")

# Example text from a video (e.g., transcript of a training session)
transcript = """
In this video, we will go through the basic features of our mobile banking app, including how to check your balance,
transfer funds, and review recent transactions.
"""

# Generate summary
summary = summarizer(transcript, max_length=100, min_length=50, do_sample=False)
print("Video Summary:", summary)


In [None]:
# Code Example for Text + Image + Audio (Fraud Detection)

from transformers import pipeline
import pytesseract
import speech_recognition as sr
from PIL import Image

# Step 1: Extract text from image (OCR for scanned document)
image_path = "scanned_fraud_report.jpg"
img = Image.open(image_path)
text_from_image = pytesseract.image_to_string(img)

# Step 2: Transcribe audio (Customer call)
recognizer = sr.Recognizer()
audio_path = "customer_call.wav"
with sr.AudioFile(audio_path) as source:
    audio_data = recognizer.record(source)
    transcript = recognizer.recognize_google(audio_data)

# Step 3: Analyze the text from both sources (OCR and transcript)
text = text_from_image + " " + transcript

# Sentiment Analysis for detecting potential fraud
sentiment_analyzer = pipeline("sentiment-analysis")
sentiment = sentiment_analyzer(text)

# Final decision based on sentiment analysis
if sentiment[0]['label'] == 'NEGATIVE':
    print("Fraud Detected: Review required")
else:
    print("No fraud detected")


In [None]:
# Multimodal Fraud Detection (Text + Image + Audio)
# Scenario: A bank wants to detect potential fraud by analyzing customer communication (audio), transaction data (text), 
# and submitted documents (images).
# Python Code for Fraud Detection

from transformers import pipeline
import pytesseract
import speech_recognition as sr
from PIL import Image

# Step 1: OCR for Document Verification (Image modality)
image_path = "customer_id_card.jpg"
img = Image.open(image_path)
document_text = pytesseract.image_to_string(img)

# Step 2: Speech-to-Text for Customer Call (Audio modality)
recognizer = sr.Recognizer()
audio_path = "customer_call.wav"
with sr.AudioFile(audio_path) as source:
    audio_data = recognizer.record(source)
    transcript = recognizer.recognize_google(audio_data)

# Step 3: Combine text from both sources (OCR and Transcript)
combined_text = document_text + " " + transcript

# Step 4: Perform Sentiment Analysis to check for Fraudulent Behavior (Text modality)
sentiment_analyzer = pipeline("sentiment-analysis")
sentiment = sentiment_analyzer(combined_text)

# Detect fraud based on sentiment
if sentiment[0]['label'] == 'NEGATIVE':
    print("Fraud Detected: Review Required")
else:
    print("No fraud detected")


In [None]:
# Multimodal Customer Support Automation (Text + Audio)
# Scenario: A bank wants to automate customer support by analyzing text queries (e.g., account balance inquiries) 
# and audio data (e.g., spoken complaints).
# Python Code for Customer Support Automation

from transformers import pipeline
import speech_recognition as sr

# Initialize speech recognizer for audio (customer query)
recognizer = sr.Recognizer()
audio_path = "customer_query.wav"

# Convert speech to text
with sr.AudioFile(audio_path) as source:
    audio_data = recognizer.record(source)
    query_text = recognizer.recognize_google(audio_data)

# Use a pre-trained text classifier to understand customer intent
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased")

# Candidate labels for classification
candidate_labels = ["Account Balance Inquiry", "Loan Inquiry", "Complaint", "Other"]

# Classify customer query intent
result = classifier(query_text, candidate_labels)

print(f"Customer Query: {query_text}")
print(f"Query Classification: {result}")


In [None]:
# Multimodal Document Verification (Text + Image)
# Scenario: A bank needs to verify a customer’s identity by analyzing both the text in a scanned document and the 
# image content (e.g., facial recognition from a photo ID).
# Python Code for Document Verification

from transformers import pipeline
import pytesseract
from PIL import Image

# Step 1: Extract text from the document image (OCR)
document_image_path = "customer_document.jpg"
doc_img = Image.open(document_image_path)
document_text = pytesseract.image_to_string(doc_img)

# Step 2: Use a pre-trained text classifier to verify document information
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased")
candidate_labels = ["Identity Verification", "Account Information", "Transaction Details"]
document_classification = classifier(document_text, candidate_labels)

print(f"Document Text: {document_text}")
print(f"Document Classification: {document_classification}")


In [None]:
# Data Preprocessing (Loading and Preparing Banking Data)
# One of the first steps when working with machine learning in banking is data preprocessing. This can involve loading data, 
# cleaning, normalizing, and transforming it into a format suitable for training models.
# Banking Scenario: Customer Loan Data Preprocessing
# We might have a dataset of customers with information like age, income, credit score, loan amount, and other features. 
# These need to be preprocessed before using them in a machine learning model.

import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Sample data
data = {
    'age': [25, 30, 45, 35, 50],
    'income': [50000, 60000, 100000, 70000, 120000],
    'credit_score': [700, 650, 800, 720, 850],
    'loan_amount': [20000, 25000, 50000, 30000, 60000],
    'approved': [1, 0, 1, 1, 0]  # 1 = Approved, 0 = Denied
}

df = pd.DataFrame(data)

# Features and target variable
X = df.drop('approved', axis=1)
y = df['approved']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print(X_train)

# 2. Building a Neural Network for Loan Approval Prediction
# In banking, predicting whether a customer will be approved for a loan is a common use case. A neural network can be 
# built using TensorFlow to classify whether a loan should be approved or denied based on features such as age, income, and credit score.
# Banking Scenario: Loan Approval Prediction using Neural Network

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the neural network model
model = Sequential([
    Dense(32, input_dim=X_train.shape[1], activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

# 3. Predicting Credit Card Fraud Detection
# Fraud detection is a major application of machine learning in banking. A model can be trained to predict fraudulent credit card transactions.
# Banking Scenario: Credit Card Fraud Detection
# We can build a neural network model to predict whether a credit card transaction is fraudulent based on features 
# such as transaction amount, merchant, and time.

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

# Simulated fraud detection data
data = {
    'amount': [100, 200, 300, 400, 500],
    'merchant': [1, 2, 1, 3, 2],
    'time': [10, 15, 20, 25, 30],
    'is_fraud': [0, 1, 0, 1, 0]  # 1 = Fraud, 0 = No fraud
}

df = pd.DataFrame(data)

# Features and target variable
X = df.drop('is_fraud', axis=1)
y = df['is_fraud']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build neural network model for fraud detection
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

# 4. Predicting Customer Churn
# Churn prediction is an essential task in the banking industry to retain customers. A churn prediction model predicts the 
# likelihood of a customer leaving the bank based on factors like transaction history, customer support interactions, etc.
# Banking Scenario: Customer Churn Prediction
# In this case, we can predict whether a customer will churn based on their behavior and interactions with the bank.

# Simulated churn data
data = {
    'age': [25, 30, 45, 35, 50],
    'transaction_frequency': [5, 2, 8, 3, 1],
    'balance': [1000, 1500, 3000, 1200, 400],
    'churned': [0, 1, 0, 0, 1]  # 1 = Churned, 0 = Not Churned
}

df = pd.DataFrame(data)

# Features and target variable
X = df.drop('churned', axis=1)
y = df['churned']

# Normalize the features
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build churn prediction model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

# 5. Forecasting Banking Stock Prices (Time Series Analysis)
# Banks may also use machine learning to predict future stock prices. A simple time series forecasting model can be 
# created using LSTM (Long Short-Term Memory), a type of recurrent neural network (RNN) suitable for sequential data.
# Banking Scenario: Stock Price Prediction using LSTM
# Here, we will use LSTM to predict future stock prices based on historical data.

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import numpy as np

# Simulate historical stock price data
stock_data = [100, 101, 102, 103, 105, 106, 107, 108, 109, 110]  # Stock prices

# Prepare the data
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        y.append(data[i + time_step])
    return np.array(X), np.array(y)

time_step = 3
X, y = create_dataset(stock_data, time_step)

# Reshaping for LSTM [samples, time steps, features]
X = X.reshape(X.shape[0], X.shape[1], 1)

# Define the LSTM model
model = Sequential([
    LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
    LSTM(units=50),
    Dense(1)  # Output layer for regression (continuous value)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, y, epochs=100, batch_size=1, verbose=2)

# Predict the next stock price
predicted_price = model.predict(X[-1].reshape(1, time_step, 1))
print(f"Predicted Next Stock Price: {predicted_price[0][0]}")

# 6. Loan Default Prediction with TensorFlow Decision Forests (TF-DF)
# TensorFlow Decision Forests (TF-DF) is an alternative to deep learning models, focusing on decision trees 
# and ensemble methods, suitable for financial applications like loan default prediction.

import tensorflow_decision_forests as tfdf

# Load data (for the sake of example, assume df is preprocessed)
data = {
    'age': [25, 30, 45, 35, 50],
    'income': [50000, 60000, 100000, 70000, 120000],
    'loan_amount': [20000, 25000, 50000, 30000, 60000],
    'defaulted': [1, 0, 0, 0, 1]  # 1 = Defaulted, 0 = No Default
}

df = pd.DataFrame(data)

# Convert data to TensorFlow Decision Forests dataset
train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(df, task=tfdf.keras.Task.CLASSIFICATION, label="defaulted")

# Train a Random Forest model
model = tfdf.keras.RandomForestModel(task=tfdf.keras.Task.CLASSIFICATION)
model.fit(train_ds)

# Make predictions
predictions = model.predict(train_ds)
print(predictions)


In [None]:
#  1: Loan Approval Prediction Using Softmax
# In this scenario, we have a model predicting the loan approval status for a customer based on features like income, 
# credit score, etc. Instead of a binary classification (approved/denied), we predict multiple possible outcomes like:
# 1.	Loan Approved
# 2.	Loan Denied
# 3.	Pending Review
# Step-by-Step Code:
# 1.	Simulate Customer Data
# 2.	Train a Model
# 3.	Apply Softmax to Predict Probabilities

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Sample banking data: [income, credit_score, loan_amount]
data = {
    'income': [50000, 60000, 100000, 70000, 120000],
    'credit_score': [700, 650, 800, 720, 850],
    'loan_amount': [20000, 25000, 50000, 30000, 60000],
    'approval_status': [0, 1, 0, 1, 2]  # 0 = Approved, 1 = Denied, 2 = Pending
}

# Convert data to DataFrame
import pandas as pd
df = pd.DataFrame(data)

# Features and target variable
X = df.drop('approval_status', axis=1)
y = df['approval_status']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the model for multi-class classification
model = Sequential([
    Dense(32, input_dim=X_train.shape[1], activation='relu'),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes: Approved, Denied, Pending
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

# Predict probabilities with softmax
predictions = model.predict(X_test)

# Display predicted probabilities for each class (Approved, Denied, Pending)
print("Predicted Probabilities:")
print(predictions)


In [None]:
# Scenario 2: Predicting Customer Churn Using Softmax
# In this scenario, we predict the likelihood of a customer churning (leaving the bank) based on 
# customer behavior. The possible outcomes can be:
# 1.	Will Stay
# 2.	Will Churn
# 3.	Uncertain
# This problem can be modeled as a multi-class classification problem with Softmax.

# Simulated churn data
churn_data = {
    'age': [25, 30, 45, 35, 50],
    'transaction_frequency': [5, 2, 8, 3, 1],
    'balance': [1000, 1500, 3000, 1200, 400],
    'churn_status': [0, 1, 0, 1, 2]  # 0 = Will Stay, 1 = Will Churn, 2 = Uncertain
}

df_churn = pd.DataFrame(churn_data)

# Features and target variable
X_churn = df_churn.drop('churn_status', axis=1)
y_churn = df_churn['churn_status']

# Normalize the features
X_scaled_churn = scaler.fit_transform(X_churn)

# Split data into training and testing sets
X_train_churn, X_test_churn, y_train_churn, y_test_churn = train_test_split(X_scaled_churn, y_churn, test_size=0.2, random_state=42)

# Build the neural network model for customer churn prediction
model_churn = Sequential([
    Dense(64, input_dim=X_train_churn.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes: Will Stay, Will Churn, Uncertain
])

# Compile the model
model_churn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model_churn.fit(X_train_churn, y_train_churn, epochs=100, batch_size=10)

# Evaluate the model on the test set
loss, accuracy = model_churn.evaluate(X_test_churn, y_test_churn)
print(f"Test Accuracy: {accuracy:.2f}")

# Predict probabilities with softmax
churn_predictions = model_churn.predict(X_test_churn)

# Display predicted probabilities for each class (Will Stay, Will Churn, Uncertain)
print("Predicted Probabilities for Customer Churn:")
print(churn_predictions)


In [None]:
# Scenario 1: Check Processing and Signature Verification
# Banks receive checks for deposit, and it's essential to automate the process of recognizing check images, 
# reading the details (e.g., account number, check amount), and verifying the signature.
# Application of Image Recognition:
# •	Optical Character Recognition (OCR) to read check details such as account number, check number, and amount.
# •	Signature verification to ensure that the signature on the check matches the one on file.
# Step-by-Step Code for Check Processing and OCR:
# We'll use the Tesseract OCR engine, which is an open-source OCR tool, along with Python libraries like pytesseract and OpenCV.
# Installing Required Libraries:

!pip install pytesseract opencv-python
# Code for Extracting Text from Check Images (OCR):

import cv2
import pytesseract

# Path to Tesseract executable (update if required on your system)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Read the check image
image_path = 'check_image.jpg'  # Sample check image file
img = cv2.imread(image_path)

# Convert the image to grayscale for better OCR accuracy
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply thresholding to get a binary image (enhance text visibility)
_, binary_image = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

# Use pytesseract to extract text from the image
extracted_text = pytesseract.image_to_string(binary_image)

print("Extracted Text from Check:")
print(extracted_text)


In [None]:
# Scenario 2: Face Recognition for Identity Verification
# Face recognition technology is widely used in banking apps to verify the identity of customers. For example, it can be 
# used to allow customers to log in to mobile banking apps or verify the person performing a transaction.
# Application of Image Recognition:
# •	Face recognition to authenticate customers based on their facial features.
# We will use a pre-trained model (like OpenCV's Haar Cascades or a deep learning model like a Convolutional Neural Network 
# (CNN)) for detecting faces in images.
# Step-by-Step Code for Face Detection with OpenCV:
# Installing Required Libraries:

!pip install opencv-python
# Code for Face Recognition:

import cv2

# Load the pre-trained Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Read the image with the customer's photo
image_path = 'customer_photo.jpg'  # Customer's photo
img = cv2.imread(image_path)

# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect faces in the image
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

# Draw a rectangle around each detected face
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# Display the image with the detected face(s)
cv2.imshow('Detected Faces', img)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
#  3: Document Classification (Loan Application Forms)
# Banks often receive documents like loan application forms, and automating the classification of these documents can 
# speed up processing. Image recognition can help classify documents into categories like "Loan Application," "ID Proof," "Address Proof," etc.
# Application of Image Recognition:
# •	Document classification based on visual features in the form.
# In this case, we can use a Convolutional Neural Network (CNN) to classify the images of documents.
# Step-by-Step Code for Document Classification with CNN:
# Install TensorFlow:

!pip install tensorflow
# Code for Training a CNN to Classify Documents:

import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set up directories for training and validation images
train_dir = 'path_to_train_images'  # Training images folder
validation_dir = 'path_to_validation_images'  # Validation images folder

# Image preprocessing
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(150, 150), batch_size=32, class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(validation_dir, target_size=(150, 150), batch_size=32, class_mode='binary')

# Define the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification (Loan App vs ID Proof)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, epochs=10, validation_data=validation_generator)

# Save the model
model.save('document_classifier.h5')


In [None]:
# Step-by-Step # Python Code Example:
# 1.	Install Required Libraries:
# You’ll need TensorFlow and Keras to build the CNN, and Matplotlib to visualize the results.

!pip install tensorflow matplotlib
# 2.	Dataset Preparation:
# For this example, let’s assume we have a dataset of ATM receipt images. The images are in different folders: one folder 
# for fraudulent receipts and one for legitimate ones. We will use ImageDataGenerator to load the data and preprocess it.

import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

# Define paths to your dataset (e.g., 'data/train' and 'data/validation')
train_dir = 'data/train'  # Folder with images of ATM receipts (fraudulent or legitimate)
validation_dir = 'data/validation'  # Folder with validation images

# Use ImageDataGenerator to rescale images and apply augmentation for better model robustness
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Load images and their labels
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),  # Resize images to 150x150 for consistency
    batch_size=32,
    class_mode='binary'  # Binary classification: Fraudulent or Legitimate
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)
# In this step:
# •	ImageDataGenerator is used to rescale the images and apply random transformations (like zoom or shear) to enhance the model's robustness.
# •	The data is loaded from directories where each folder contains images of a particular class (fraudulent or legitimate).

# 3.	Build the CNN Model with Flattening:
# Now, let’s build a CNN model. After feature extraction through convolutional and pooling layers, we will flatten the 
# output before feeding it into the fully connected layers.

# Define the CNN model
model = models.Sequential([
    # First convolutional layer
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D((2, 2)),
    
    # Second convolutional layer
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Third convolutional layer
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Flatten the output from convolutional layers to a 1D vector
    layers.Flatten(),  # Flattening layer
    
    # Fully connected layers
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary output: Fraud or Legitimate
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()


In [None]:
# Now, we can train the model using the data generators created earlier.

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=100,  # Number of batches per epoch
    epochs=10,
    validation_data=validation_generator,
    validation_steps=50  # Number of validation batches
)

# Save the model
model.save('atm_receipt_fraud_detection_model.h5')


In [None]:
# Visualizing Model Training:
# You can visualize the training and validation accuracy over epochs to see how well the model is performing.

# Plot training & validation accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()


In [None]:
# Python Code for Tabular Q-Learning:

import numpy as np

# Define the environment (simple banking scenario)
states = [0, 1, 2]  # 0: Low balance, 1: Medium balance, 2: High balance
actions = [0, 1, 2]  # 0: No action, 1: Offer loan, 2: Send notification

# Initialize Q-table with zeros
Q = np.zeros((len(states), len(actions)))

# Define parameters
alpha = 0.1  # learning rate
gamma = 0.9  # discount factor
episodes = 1000

# Rewards for each action-state pair
rewards = {
    (0, 0): -1, (0, 1): -10, (0, 2): 2,  # Low balance
    (1, 0): 0, (1, 1): 2, (1, 2): 1,   # Medium balance
    (2, 0): 1, (2, 1): 5, (2, 2): 3   # High balance
}

# Simulate the learning process
for episode in range(episodes):
    state = np.random.choice(states)  # Random initial state
    
    while state != 2:  # Stop if we reach the 'high balance' state
        action = np.argmax(Q[state])  # Choose action with max Q-value (greedy policy)
        
        next_state = np.random.choice(states)  # Transition to a new state (simulated)
        
        reward = rewards[(state, action)]  # Get the reward for the state-action pair
        
        # Update the Q-table using the Q-learning equation
        Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])
        
        state = next_state  # Move to the next state

# Print final Q-table
print("Final Q-table:")
print(Q)


In [None]:
# Python Code for Deep Q-Learning (using TensorFlow/Keras):

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

# Define the environment and actions as in the previous example
states = 10  # Simulate continuous states (for simplicity, discretized)
actions = 3  # 3 possible actions
state_size = 10  # Number of state features
action_size = 3  # Number of actions

# Create a simple neural network model for Deep Q-Learning
def create_model():
    model = tf.keras.Sequential([
        layers.Dense(64, input_dim=state_size, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(action_size, activation='linear')  # Q-values for each action
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

# Initialize model and memory
model = create_model()
memory = []  # List to store (state, action, reward, next_state) tuples

# Q-learning hyperparameters
gamma = 0.9  # discount factor
epsilon = 0.1  # exploration rate (epsilon-greedy)
batch_size = 32

# Define a simple function to simulate the environment and train the model
def train_model():
    for episode in range(1000):
        state = np.random.rand(state_size)  # Simulate a random state
        
        for time in range(100):
            if np.random.rand() < epsilon:
                action = np.random.choice(actions)  # Explore: choose random action
            else:
                q_values = model.predict(state.reshape(1, -1))
                action = np.argmax(q_values)  # Exploit: choose best action
            
            next_state = np.random.rand(state_size)  # Simulate the next state
            reward = np.random.randn()  # Simulate a random reward
            
            memory.append((state, action, reward, next_state))
            
            # Train the model with a random batch from memory
            if len(memory) > batch_size:
                minibatch = np.random.choice(memory, batch_size)
                for s, a, r, ns in minibatch:
                    target = r + gamma * np.max(model.predict(ns.reshape(1, -1)))
                    q_values = model.predict(s.reshape(1, -1))
                    q_values[0][a] = target
                    model.fit(s.reshape(1, -1), q_values, verbose=0)
            
            state = next_state  # Move to the next state

train_model()


In [None]:
# Code Sample to Demonstrate Key Terminologies:

import numpy as np

# States: Represent customer balances
states = [0, 1, 2]  # 0: Low balance, 1: Medium balance, 2: High balance

# Actions: What actions the bank can take
actions = [0, 1, 2]  # 0: No action, 1: Offer loan, 2: Send notification

# Initialize Q-table with zeros
Q = np.zeros((len(states), len(actions)))

# Define parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration factor (epsilon-greedy)
episodes = 1000  # Number of training episodes

# Rewards for each action-state pair
rewards = {
    (0, 0): -1, (0, 1): -10, (0, 2): 2,  # Low balance
    (1, 0): 0, (1, 1): 2, (1, 2): 1,   # Medium balance
    (2, 0): 1, (2, 1): 5, (2, 2): 3   # High balance
}

# Simulate Q-learning process
for episode in range(episodes):
    state = np.random.choice(states)  # Start from a random state
    
    while state != 2:  # Continue until we reach 'high balance' state
        if np.random.rand() < epsilon:
            action = np.random.choice(actions)  # Explore: choose random action
        else:
            action = np.argmax(Q[state])  # Exploit: choose action with max Q-value
        
        # Simulate reward and next state
        next_state = np.random.choice(states)  # Transition to a new state (randomly)
        reward = rewards[(state, action)]  # Get the reward for the state-action pair
        
        # Update Q-table using the Q-learning equation
        Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])
        
        state = next_state  # Transition to the next state

# Print final Q-table
print("Final Q-table:")
print(Q)


In [None]:
# Python Code for Determining Q-value in a Banking Scenario
# Below is a # Python Code example for determining the Q-values in the ATM scenario described above.

import numpy as np

# Define states: Represent customer balances (Low, Medium, High)
states = [0, 1, 2]  # 0: Low balance, 1: Medium balance, 2: High balance

# Define actions: What actions the bank can take
actions = [0, 1, 2]  # 0: No action, 1: Offer loan, 2: Send notification

# Initialize Q-table with zeros (3 states, 3 actions)
Q = np.zeros((len(states), len(actions)))

# Define learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration factor (epsilon-greedy)
episodes = 1000  # Number of training episodes

# Define rewards for each action-state pair
rewards = {
    (0, 0): -1, (0, 1): -10, (0, 2): 2,  # Low balance state rewards
    (1, 0): 0, (1, 1): 2, (1, 2): 1,   # Medium balance state rewards
    (2, 0): 1, (2, 1): 5, (2, 2): 3   # High balance state rewards
}

# Simulate Q-learning process
for episode in range(episodes):
    state = np.random.choice(states)  # Start from a random state
    
    while state != 2:  # Continue until we reach 'high balance' state
        # Exploration: Choose a random action, or Exploitation: Choose action with max Q-value
        if np.random.rand() < epsilon:
            action = np.random.choice(actions)  # Explore: choose random action
        else:
            action = np.argmax(Q[state])  # Exploit: choose action with max Q-value
        
        # Simulate reward and next state
        next_state = np.random.choice(states)  # Transition to a new state (randomly)
        reward = rewards[(state, action)]  # Get the reward for the state-action pair
        
        # Update Q-table using the Q-learning update rule
        Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state, action])
        
        # Move to the next state
        state = next_state

# Print final Q-table
print("Final Q-table:")
print(Q)


In [None]:
# Installing LangChain:
# To install LangChain and the necessary dependencies, run the following commands:

!pip install langchain openai
!pip install pandas  # In case you want to use structured data like CSVs
!pip install requests  # For API interaction
# Basic LangChain Setup with # Python Code:
# Let's start by using LangChain with an LLM from OpenAI (e.g., GPT-3 or GPT-4) in a simple chain.

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize the OpenAI model (ensure you have your OpenAI API key)
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create a prompt template
template = "Translate the following English text to French: {text}"
prompt = PromptTemplate(input_variables=["text"], template=template)

# Create the LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain
output = chain.run("Hello, how are you?")
print(output)


In [None]:
# : A bank's chatbot that answers customer queries about account balances, recent transactions, or loan information.

from langchain.agents import initialize_agent, AgentType
from langchain.agents import Tool
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Example tools: Assume functions to interact with bank APIs are available
def get_balance(account_id):
    # Dummy function, replace with real API call to bank backend
    return 1000.0  # Dummy balance

def get_transaction_history(account_id):
    # Dummy function, replace with real transaction query
    return ["Transaction 1: -$200", "Transaction 2: +$500"]

# Create tool for balance query
balance_tool = Tool(
    name="Get Account Balance",
    func=get_balance,
    description="This tool retrieves the balance for a given account."
)

# Create tool for transaction history query
transaction_tool = Tool(
    name="Get Transaction History",
    func=get_transaction_history,
    description="This tool retrieves the transaction history for a given account."
)

# Initialize the language model
llm = OpenAI(api_key="your-openai-api-key", temperature=0.5)

# Create the agent with tools
tools = [balance_tool, transaction_tool]
agent = initialize_agent(
    tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

# Simulating customer query for balance
query = "What is my current account balance?"
response = agent.run(query)
print(response)

# Simulating customer query for transactions
query = "Show me my last 5 transactions."
response = agent.run(query)
print(response)


In [None]:
# Processing Loan Applications
# LangChain can automate the review and analysis of loan application forms, helping the bank make faster decisions 
# based on customer inputs. It can be used for document summarization, checking eligibility criteria, and auto-filling forms.
# Scenario: Automatically review loan applications and extract key details such as the applicant's name, income, loan amount, etc.

from langchain.document_loaders import TextLoader
from langchain.chains import AnalyzeDocumentChain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Define a document loader to read loan application (e.g., a PDF file)
loader = TextLoader("loan_application.txt")

# Use a prompt template to analyze the document content
template = """
Extract the applicant's name, requested loan amount, and monthly income from the following loan application:
{document}
"""
prompt = PromptTemplate(input_variables=["document"], template=template)

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.3)

# Create the LLMChain for document analysis
chain = LLMChain(llm=llm, prompt=prompt)

# Load the document
document = loader.load()

# Run the analysis chain
output = chain.run(document[0].page_content)
print(output)


In [None]:
# Scenario: Automatically flag suspicious transactions based on specific patterns like large transfers, frequent international 
# transactions, or inconsistent spending.

import pandas as pd
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Sample transaction data (in practice, this would come from the bank's transaction system)
data = {
    "transaction_id": [1, 2, 3, 4],
    "amount": [1500, 25000, 50, 2000],
    "transaction_type": ["Debit", "Transfer", "Debit", "Transfer"],
    "location": ["USA", "Switzerland", "USA", "Germany"],
}

df = pd.DataFrame(data)

# Create a prompt template to analyze transactions
template = """
Analyze the following transactions for possible fraudulent activity based on amount, transaction type, and location:
{transactions}
"""
prompt = PromptTemplate(input_variables=["transactions"], template=template)

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.6)

# Create the analysis chain
chain = LLMChain(llm=llm, prompt=prompt)

# Convert the transaction data to a text format
transactions_text = df.to_string(index=False)

# Run the chain
output = chain.run(transactions_text)
print(output)


In [None]:
# Scenario: Automating responses to frequently asked questions like checking account balances, transferring money, or understanding loan terms.

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.agents import initialize_agent, Tool, AgentType

# Function to simulate querying a customer account balance
def get_balance(account_id):
    # Simulating API response; in practice, this would query a banking API
    return f"Your current balance is $10,000 for account {account_id}."

# Create a tool for the agent to use for balance queries
balance_tool = Tool(
    name="Balance Query",
    func=get_balance,
    description="This tool helps in retrieving customer account balances."
)

# Initialize the LLM (OpenAI GPT-3/4 model)
llm = OpenAI(api_key="your-openai-api-key", temperature=0.5)

# Create an agent using LangChain's zero-shot model (reactive agent)
tools = [balance_tool]
agent = initialize_agent(
    tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

# Simulate a customer query
query = "What is my balance in account 12345?"
response = agent.run(query)
print(response)


In [None]:
# Scenario: Extracting applicant details from loan application forms (like PDFs), checking eligibility, and providing recommendations.

from langchain.document_loaders import TextLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Simulating a loan application document (in reality, you'd load from a file)
application_text = """
Applicant: John Doe
Income: $75,000
Requested Loan: $20,000
Credit Score: 720
"""

# Creating a prompt template to extract loan details
template = """
Extract the applicant's name, requested loan amount, and income from the following loan application:
{document}
"""
prompt = PromptTemplate(input_variables=["document"], template=template)

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.3)

# Create the chain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the application analysis
output = chain.run(application_text)
print(output)


In [None]:
# : Flagging suspicious transactions based on transaction amount, type, and location.

import pandas as pd
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Example transaction data (this would typically come from the bank's internal system)
data = {
    "transaction_id": [1, 2, 3, 4],
    "amount": [1500, 25000, 50, 2000],
    "transaction_type": ["Debit", "Transfer", "Debit", "Transfer"],
    "location": ["USA", "Switzerland", "USA", "Germany"],
}

df = pd.DataFrame(data)

# Creating a prompt to analyze transactions for potential fraud
template = """
Analyze the following transactions and identify any that may be considered suspicious due to high amounts or unusual locations:
{transactions}
"""
prompt = PromptTemplate(input_variables=["transactions"], template=template)

# Initialize the LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.6)

# Create the analysis chain
chain = LLMChain(llm=llm, prompt=prompt)

# Convert the transaction data to a text format for the model
transactions_text = df.to_string(index=False)

# Run the chain to analyze the transactions
output = chain.run(transactions_text)
print(output)


In [None]:
# Scenario: Offering personalized advice for savings and investments based on a customer’s financial profile.

# Example customer data
customer_data = {
    "income": 5000,
    "expenses": 3000,
    "savings": 20000,
    "investment_preferences": "low risk",
}

# Creating a prompt template for financial advice
template = """
Given the following financial profile, provide personalized advice on savings and investments:
Income: {income}
Expenses: {expenses}
Savings: {savings}
Investment Preferences: {investment_preferences}
"""
prompt = PromptTemplate(input_variables=["income", "expenses", "savings", "investment_preferences"], template=template)

# Initialize the LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create the chain for generating personalized advice
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain with customer data
output = chain.run(customer_data)
print(output)


In [None]:
# Banking Scenario: Automating customer support for account-related queries by leveraging an LLM to generate responses to frequently asked questions.

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize the LLM (OpenAI GPT-3)
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create a prompt template for frequently asked questions
template = "What is the current balance of account {account_number}?"
prompt = PromptTemplate(input_variables=["account_number"], template=template)

# Create the LLM chain
chain = LLMChain(llm=llm, prompt=prompt)

# Simulate querying the account balance
response = chain.run(account_number="123456")
print(response)


In [None]:
# Banking Scenario: Creating a loan processing system that checks a customer's credit score, loan eligibility, and then generates a report.

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Define templates for different stages of loan processing
credit_score_template = "What is the credit score for a person with the following information: {customer_data}?"
loan_eligibility_template = "Is a person eligible for a loan based on the following credit score: {credit_score}?"

# Create the chains for processing the loan application
credit_score_chain = LLMChain(llm=llm, prompt=PromptTemplate(input_variables=["customer_data"], template=credit_score_template))
loan_eligibility_chain = LLMChain(llm=llm, prompt=PromptTemplate(input_variables=["credit_score"], template=loan_eligibility_template))

# Simulate customer data
customer_data = {"name": "John Doe", "income": 60000, "loan_amount": 20000}

# Run the chains sequentially
credit_score = credit_score_chain.run(customer_data)
eligibility = loan_eligibility_chain.run(credit_score)

print(f"Credit Score: {credit_score}")
print(f"Loan Eligibility: {eligibility}")


In [None]:
# Banking Scenario: Building a virtual assistant that can help customers with various services, such as checking account balance, 
# transferring funds, or providing loan details.

from langchain.agents import initialize_agent, AgentType
from langchain.agents import Tool
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Define a few tools for handling bank operations
def get_balance(account_id):
    # Placeholder: In practice, integrate with your bank's system
    return f"Your balance for account {account_id} is $5000."

def transfer_funds(from_account, to_account, amount):
    # Placeholder: Simulate a funds transfer
    return f"Transferred ${amount} from account {from_account} to account {to_account}."

# Initialize tools
balance_tool = Tool(name="Balance", func=get_balance, description="Fetch account balance.")
transfer_tool = Tool(name="Transfer", func=transfer_funds, description="Transfer funds between accounts.")

# Initialize the LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.5)

# Initialize the agent with the tools
tools = [balance_tool, transfer_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example user query
query = "What is the balance of account 12345?"
response = agent.run(query)
print(response)

query = "Transfer $500 from account 12345 to account 67890."
response = agent.run(query)
print(response)


In [None]:
# Banking Scenario: Building a virtual banking assistant that remembers past transactions or loan requests to provide more personalized service.

from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Initialize memory to store conversation history
memory = ConversationBufferMemory()

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create a prompt template to respond based on memory
template = "What was the last transaction made by {user_name}? {conversation_history}"
prompt = PromptTemplate(input_variables=["user_name", "conversation_history"], template=template)

# Create the LLM chain with memory
chain = LLMChain(llm=llm, prompt=prompt)

# Simulate a series of interactions
conversation = [
    "What is my balance?",
    "Your balance is $5,000.",
    "Have I made any recent transactions?",
    "Yes, you made a $200 purchase yesterday."
]

# Store conversation history in memory
for message in conversation:
    memory.chat_memory.add_user_message(message)

# Generate response based on memory
response = chain.run(user_name="John", conversation_history=memory.chat_memory.get_history())
print(response)


In [None]:
# Banking Scenario: Integrating an external credit score API to check customer credit ratings for loan approval.

import requests
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.agents import Tool

# Simulating an external credit score API (replace with real API in production)
def get_credit_score(customer_id):
    # Dummy response; replace with real API call
    return 720  # A valid credit score for the demo

# Tool to interact with the credit score API
credit_score_tool = Tool(
    name="Credit Score Checker",
    func=get_credit_score,
    description="Fetch the credit score of a customer."
)

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create the agent with the tool
tools = [credit_score_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Simulate customer query to check credit score
query = "What is the credit score of customer 123?"
response = agent.run(query)
print(response)


In [None]:
# Banking Scenario: Automating the extraction of key information from loan application documents (e.g., name, loan amount, income) for faster processing.

from langchain.document_loaders import TextLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Simulating a loan application text document
application_text = """
Applicant: Jane Doe
Loan Amount: $30,000
Income: $85,000
"""

# Define a prompt to extract details from the application
template = """
Extract the following details from the loan application:
1. Applicant Name
2. Requested Loan Amount
3. Income
{document}
"""
prompt = PromptTemplate(input_variables=["document"], template=template)

# Initialize LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.5)

# Create LLM chain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain with the application document
output = chain.run(application_text)
print(output)


In [None]:
# Banking Scenario: A virtual assistant for customers in the banking sector that can answer questions about account balances,
# loan status, and recent transactions.

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize OpenAI LLM (GPT-3/4) for querying
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Define the prompt template for answering customer queries
template = "What is the balance of account {account_number}?"
prompt = PromptTemplate(input_variables=["account_number"], template=template)

# Create an LLM chain
chain = LLMChain(llm=llm, prompt=prompt)

# Example customer query
response = chain.run(account_number="12345")
print(response)


In [None]:
# Banking Scenario: A banking application that integrates with a credit score API to check a customer's eligibility for a loan.

import requests
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.llms import OpenAI

# Simulated external API for checking credit score
def check_credit_score(customer_id):
    # Simulating an API response, replace with a real API in production
    return 750  # Example of a good credit score

# Tool for checking credit score
credit_score_tool = Tool(
    name="Credit Score Checker",
    func=check_credit_score,
    description="Check the credit score of a customer."
)

# Initialize OpenAI LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.6)

# Initialize agent with credit score tool
tools = [credit_score_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example query to check credit score
response = agent.run("What is the credit score for customer 12345?")
print(response)


In [None]:
# Banking Scenario: A virtual assistant that remembers a customer's previous loan application details, such as loan amount and current status.

from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Initialize memory to store conversation history
memory = ConversationBufferMemory()

# Initialize OpenAI LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Define a prompt template that uses conversation history
template = "Based on the following conversation, what is the loan status for the customer? {conversation_history}"
prompt = PromptTemplate(input_variables=["conversation_history"], template=template)

# Create LLM chain with memory
chain = LLMChain(llm=llm, prompt=prompt)

# Simulate a series of interactions
conversation = [
    "What is the loan status for my application?",
    "Your loan application is being processed.",
    "What is the loan amount I applied for?",
    "You applied for a $20,000 loan."
]

# Store conversation history in memory
for message in conversation:
    memory.chat_memory.add_user_message(message)

# Generate response based on memory
response = chain.run(conversation_history=memory.chat_memory.get_history())
print(response)


In [None]:
# Banking Scenario: Automatically extracting key details from a loan application document (e.g., applicant name, loan amount, income).

from langchain.document_loaders import TextLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Simulating a loan application document
application_text = """
Applicant: Jane Doe
Loan Amount: $50,000
Income: $85,000
"""

# Define a prompt template to extract relevant information from the application
template = """
Extract the following details from the loan application:
1. Applicant Name
2. Loan Amount
3. Income
{document}
"""
prompt = PromptTemplate(input_variables=["document"], template=template)

# Initialize OpenAI LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.5)

# Create LLM chain to process the loan application document
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain with the loan application document
output = chain.run(application_text)
print(output)


In [None]:
# Banking Scenario: Automatically retrieving the latest stock market information or interest rates from external websites.

from langchain.tools import SeleniumWebBrowser
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, AgentType

# Example tool for web scraping using Selenium
def get_stock_price(stock_symbol):
    browser = SeleniumWebBrowser(driver_path="path_to_your_webdriver")
    browser.get(f'https://www.example.com/stock/{stock_symbol}')
    price = browser.find_element_by_id('stock_price').text
    browser.quit()
    return price

# Create a tool for the agent to use
stock_price_tool = Tool(
    name="Stock Price Fetcher",
    func=get_stock_price,
    description="Fetch the current stock price for a given symbol."
)

# Initialize LLM and agent
llm = OpenAI(api_key="your-openai-api-key", temperature=0.6)
tools = [stock_price_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example query to get stock price for a symbol
response = agent.run("What is the stock price for AAPL?")
print(response)


In [None]:
# Banking Scenario: Querying a customer database to retrieve account details or transaction history.

import sqlite3
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.llms import OpenAI

# Example database query function (SQLite for demonstration)
def get_transaction_history(account_id):
    conn = sqlite3.connect('banking_data.db')
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM transactions WHERE account_id=?", (account_id,))
    transactions = cursor.fetchall()
    conn.close()
    return transactions

# Tool for querying transaction history
transaction_tool = Tool(
    name="Transaction History Fetcher",
    func=get_transaction_history,
    description="Fetch transaction history for a given account."
)

# Initialize LLM and agent
llm = OpenAI(api_key="your-openai-api-key", temperature=0.6)
tools = [transaction_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example query to fetch transaction history for account 12345
response = agent.run("What are the last 5 transactions for account 12345?")
print(response)


In [None]:
# Banking Scenario: Storing loan approval documents or customer correspondence in cloud storage (e.g., S3) for future access.

import boto3
from langchain.tools import Tool
from langchain.llms import OpenAI
from langchain.agents import initialize_agent, AgentType

# Simulated function for retrieving documents from S3
def get_loan_approval_document(document_id):
    s3 = boto3.client('s3')
    bucket_name = 'banking-docs'
    file_name = f"loan_approval_{document_id}.pdf"
    file = s3.get_object(Bucket=bucket_name, Key=file_name)
    return file['Body'].read().decode('utf-8')

# Tool for interacting with file storage
file_tool = Tool(
    name="File Storage Handler",
    func=get_loan_approval_document,
    description="Fetch loan approval documents from cloud storage."
)

# Initialize LLM and agent
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)
tools = [file_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Example query to fetch loan approval document
response = agent.run("Retrieve the loan approval document with ID 12345")
print(response)


In [None]:
# Define the Prompt Template:
# In a banking scenario, let's create a prompt template to answer a customer's query about their loan application status.
# Example: We want to create a prompt that fetches the loan status for a specific customer. The template would look like this:

from langchain.prompts import PromptTemplate

# Define the template for the loan application status prompt
loan_status_template = """
You are a virtual assistant for a bank. Your task is to check the loan application status.
Check the loan status for customer with account number {account_number} and provide a detailed response.
"""
# Here, {account_number} is a placeholder that will be replaced with the customer's account number.
# ________________________________________
# 2. Set Up Input Variables:
# For the banking scenario, the input variable will be the account_number that we will pass dynamically 
# based on the user's request. The customer might provide their account number, and we will fetch the status accordingly.

# Example input variable for account number
account_number = "123456789"
# This value would be dynamically replaced in the prompt template to generate a query that checks the loan status for the customer.
# ________________________________________
# 3. Create Prompt with LangChain's PromptTemplate Class:
# Once you define the template and the input variables, you can use LangChain's PromptTemplate class to create a complete prompt.

from langchain.prompts import PromptTemplate

# Define the template with a dynamic placeholder for account number
loan_status_template = """
You are a virtual assistant for a bank. Your task is to check the loan application status.
Check the loan status for customer with account number {account_number} and provide a detailed response.
"""

# Create the PromptTemplate object with the input variable (account_number)
prompt = PromptTemplate(input_variables=["account_number"], template=loan_status_template)

# Render the prompt with a specific account number
rendered_prompt = prompt.render(account_number="123456789")
print(rendered_prompt)


# 4. Use the Prompt with an LLM:
# Next, we use the rendered_prompt with an LLM, like OpenAI's GPT-3, to generate the response.

from langchain.llms import OpenAI

# Initialize OpenAI LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Generate a response based on the rendered prompt
response = llm(rendered_prompt)
print(response)

# 5. Combine Prompts with Chains (Optional):
# In more complex scenarios, you might want to combine multiple prompts and steps to build a comprehensive system. 
# LangChain allows you to chain multiple steps together. For instance, you can first fetch account details from a database, 
# then pass the data to the LLM to generate a detailed response.
# Example: Combining Account Query with Loan Status Response
# Let’s imagine a scenario where we query a database for a customer’s loan application and then provide a detailed loan status based on the results.

from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import sqlite3

# Simulate a simple database to fetch loan application status
def fetch_loan_application(account_number):
    # This is a mock function simulating fetching loan data from a database
    # In a real application, you would query your actual database here.
    mock_data = {
        "123456789": "In Progress",
        "987654321": "Approved",
        "112233445": "Rejected"
    }
    return mock_data.get(account_number, "No Application Found")

# Define the template with a placeholder for loan status
loan_status_template = """
You are a virtual assistant for a bank. Your task is to check the loan application status.
The status of the loan for customer with account number {account_number} is {loan_status}.
Please provide a detailed response based on the loan status.
"""

# Create the PromptTemplate object with the input variables
prompt = PromptTemplate(input_variables=["account_number", "loan_status"], template=loan_status_template)

# Create a chain to integrate the database query and prompt
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)
chain = LLMChain(llm=llm, prompt=prompt)

# Function to fetch loan status and pass it to the prompt
def get_loan_status(account_number):
    loan_status = fetch_loan_application(account_number)  # Fetch loan status from "database"
    result = chain.run(account_number=account_number, loan_status=loan_status)  # Pass it to LLM
    return result

# Get loan status for account 123456789
response = get_loan_status("123456789")


In [None]:
# . Set Up the Environment
# Before you start building with LangChain, you need to install the required packages. In most cases, this will include 
# LangChain itself, and other dependencies like OpenAI (or other LLM providers), database connectors, or API clients.

!pip install langchain openai
# For the banking application, you may need additional libraries, such as for connecting to a database (e.g., sqlite3, pymysql),
# or for interacting with APIs (e.g., requests, boto3 for AWS integration).
# ________________________________________
# 2. Create and Configure Prompts
# The first step in using LangChain is to define prompts that will guide the behavior of the LLM. These prompts are essential in 
# instructing the model on how to respond to user queries.
# Banking Scenario: Let’s build a prompt for checking a customer's loan status using a dynamic account number.

from langchain.prompts import PromptTemplate

# Define the prompt template for loan status query
loan_status_template = """
You are a virtual assistant for a bank. Your task is to check the loan application status.
Check the loan status for customer with account number {account_number} and provide a detailed response.
"""

# Create the PromptTemplate object
prompt = PromptTemplate(input_variables=["account_number"], template=loan_status_template)
# Here, the {account_number} will be replaced by an actual account number dynamically when the prompt is invoked.
# ________________________________________
# 3. Integrate External Tools and Data Sources
# LangChain shines when it comes to integrating external tools, databases, and APIs. In banking applications, you will often need to query databases for customer data or interact with external APIs for services like credit score checking or stock market information.
# Banking Scenario: Let’s simulate querying a simple customer database for loan application details.

import sqlite3

# Simulate a database function that fetches loan status based on account number
def fetch_loan_status(account_number):
    conn = sqlite3.connect('banking_data.db')
    cursor = conn.cursor()
    cursor.execute("SELECT loan_status FROM loans WHERE account_number=?", (account_number,))
    result = cursor.fetchone()
    conn.close()
    return result[0] if result else "No application found"

# Example query to fetch loan status for a specific customer
account_number = "123456789"
loan_status = fetch_loan_status(account_number)
print(loan_status)  # "Approved" or "Pending", etc.
# In this example, we connect to a database to retrieve the loan status of a customer based on their account number.
# LangChain can be integrated with such tools to automate the flow of fetching data and generating responses.
# ________________________________________
# 4. Use Memory for Contextual Awareness (Optional)
# In more sophisticated applications, you may want the model to "remember" information from previous interactions with the user. 
# LangChain provides memory integrations that allow LLMs to retain contextual information across sessions.
# Banking Scenario: A customer might interact with a virtual assistant multiple times about different aspects of their 
# loan application. The assistant could "remember" previous interactions, such as the customer’s application status.

from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Initialize memory to store the conversation history
memory = ConversationBufferMemory()

# Initialize the LLM (OpenAI GPT in this case)
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Define a prompt template for querying loan status
template = "What is the loan status for customer {account_number}? Please summarize the previous interactions."
prompt = PromptTemplate(input_variables=["account_number"], template=template)

# Create a chain with memory
chain = LLMChain(llm=llm, prompt=prompt)

# Simulate a conversation
memory.chat_memory.add_user_message("What is the status of my loan?")
memory.chat_memory.add_assistant_message("Your loan is under review.")
memory.chat_memory.add_user_message("When will I know the result?")

# Generate a response based on the memory and the account number
response = chain.run(account_number="123456789")
print(response)

# The assistant will be able to remember previous interactions and provide more context-aware answers.
# ________________________________________
# 5. Develop the Core Application Logic
# Once you've set up your prompts, tools, and optional memory features, you can develop the core application logic. 
# This involves combining everything together to build the final solution, integrating database queries, external APIs, 
# and prompts to create intelligent interactions.
# Banking Scenario: Let’s build a banking chatbot that checks loan application status and provides the current credit 
# score using a simple integration with a mock external API.

import requests
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, AgentType

# Simulate an external API for checking credit score
def get_credit_score(account_number):
    # Simulating an API request to check credit score
    # In reality, this would be a call to an actual service.
    credit_scores = {"123456789": 750, "987654321": 620}
    return credit_scores.get(account_number, "No score available")

# Define tools for querying loan status and credit score
credit_score_tool = Tool(
    name="Credit Score Fetcher",
    func=get_credit_score,
    description="Fetch the credit score of a customer"
)

# Initialize OpenAI LLM
llm = OpenAI(api_key="your-openai-api-key", temperature=0.7)

# Create a chain that can call multiple tools
tools = [credit_score_tool]
agent = initialize_agent(tools, llm, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Get customer details and use the agent to provide a response
account_number = "123456789"
response = agent.run(f"What is the loan status and credit score for customer {account_number}?")
print(response)

# In this scenario, we combine querying a database for the loan status and using an external API to fetch the credit score. 
# The LLM provides a comprehensive response to the user based on both tools.
# ________________________________________
# 6. Implement Error Handling and Robustness
# Error handling is essential for any real-world application. You should ensure that your LangChain-based system can handle 
# exceptions gracefully and provide meaningful feedback.
# Example:

def fetch_loan_status_with_error_handling(account_number):
    try:
        return fetch_loan_status(account_number)
    except Exception as e:
        return f"An error occurred while fetching the loan status: {str(e)}"

# Handling an invalid account number
response = fetch_loan_status_with_error_handling("invalid_account")
print(response)


In [None]:
# Banking Scenario: Fraud Detection: Let’s assume a banking use case where we are trying to detect fraudulent transactions.
# We’ll use a simple deep learning model to classify transactions as either fraudulent or non-fraudulent.

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Build a simple neural network for fraud detection
model = Sequential()

# Input layer
model.add(Dense(64, input_dim=30, activation='relu'))  # Assume 30 features (e.g., transaction data)

# Hidden layers
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))  # Dropout to prevent overfitting

# Output layer
model.add(Dense(1, activation='sigmoid'))  # Binary classification (fraud or not)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# In this example, we define a simple feed-forward neural network to classify transactions. The input_dim=30 corresponds to 
# 30 features describing a transaction (e.g., amount, time, merchant).
# ________________________________________
# 2. Training the Model
# After building the model, we need to train it using labeled data. Keras provides an easy interface to train the model using the fit() function.
# Banking Scenario: Training a Fraud Detection Model: Assume that we have a dataset containing both fraudulent and non-fraudulent transactions.

# Example dataset: X is the feature matrix, y is the target labels (0 for non-fraud, 1 for fraud)
X_train = ...
y_train = ...

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)
# Here, epochs=10 means the model will iterate over the data 10 times, and batch_size=32 means the model will update weights 
# after processing 32 samples at once. The validation_split=0.2 argument means 20% of the data will be used for validation during training.
# ________________________________________
# 3. Model Evaluation
# Once the model is trained, it’s essential to evaluate its performance on unseen data using metrics like accuracy, precision, recall, F1 score, etc.
# Banking Scenario: Evaluate the Performance of Fraud Detection Model: Let’s assume that we have test data (X_test, y_test) 
# that the model has never seen.

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")
# Evaluating the model on test data gives us an understanding of its performance and generalization ability to new, unseen data.
# ________________________________________
# 4. Prediction
# After training and evaluating the model, you can use it to make predictions on new, unseen data. In a banking scenario, we can use the model to predict if a transaction is fraudulent or not.
# Banking Scenario: Predict Fraudulent Transactions

# New transaction data to predict (e.g., features of a transaction)
new_transaction = ...

# Predict if the transaction is fraudulent or not
prediction = model.predict(new_transaction)

# 0 indicates non-fraud, 1 indicates fraud
if prediction[0] > 0.5:
    print("Fraudulent transaction detected!")
else:
    print("Transaction is legitimate.")
# Here, model.predict() outputs a probability, and we can classify the transaction as fraudulent if the probability exceeds 0.5.
# ________________________________________
# 5. Transfer Learning
# Keras also supports transfer learning, where you can take a pre-trained model (on a large dataset) and fine-tune 
# it for a specific task with a smaller dataset. This is particularly useful when you have limited data but still want to 
# leverage pre-trained models.
# Banking Scenario: Customer Churn Prediction
# You can use a pre-trained model on customer behavior (e.g., from a general dataset) and fine-tune it for customer 
# churn prediction, a common task in banking.

from keras.applications import VGG16
from keras.models import Model
from keras.layers import Flatten, Dense

# Load a pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers for our specific task (e.g., churn prediction)
x = Flatten()(base_model.output)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

# Create the model
churn_model = Model(inputs=base_model.input, outputs=x)

# Freeze the base model layers (keep pre-trained weights intact)
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
churn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fine-tune the model with your churn data
churn_model.fit(X_churn_train, y_churn_train, epochs=5)
# In this scenario, we leverage VGG16, a pre-trained image model, and add custom layers for predicting customer churn.
# ________________________________________
# 6. Saving and Loading Models
# Once a model is trained, it’s important to save it so you can deploy it without retraining. Keras allows you to save 
# models in both the JSON and HDF5 formats.
# Banking Scenario: Saving and Loading a Trained Model

# Save the model
model.save("fraud_detection_model.h5")

# Load the saved model
from keras.models import load_model
loaded_model = load_model("fraud_detection_model.h5")

# Use the loaded model to make predictions
loaded_model.predict(new_transaction)
# Saving and loading models is useful for deploying models to production and avoiding unnecessary retraining.
# ________________________________________
# 7. Data Preprocessing and Augmentation
# Keras provides tools to preprocess and augment data, especially for tasks like image classification, text analysis, 
# and more. In banking applications, preprocessing may involve normalization, missing value imputation, or encoding categorical variables.
# Banking Scenario: Preprocessing Transaction Data

from keras.preprocessing import StandardScaler

# Normalize transaction data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Optionally, perform data augmentation (for example, for time-series data)
# Data preprocessing is critical to ensure that the model performs well. For example, you might normalize the transaction amount,
# time, and other numerical features to ensure uniformity and stability in training.
# ________________________________________
# 8. Advanced Features and Custom Layers
# Keras also supports custom layers, loss functions, and other advanced features that allow you to tailor the model architecture 
# to your needs. This can be useful in specialized banking applications, like implementing custom risk scoring systems or building 
# hybrid models that combine machine learning and rule-based approaches.
# Banking Scenario: Custom Loss Function for Risk Assessment

from keras import backend as K
from keras.losses import binary_crossentropy

# Define a custom loss function that penalizes false negatives (fraud detection)
def custom_loss(y_true, y_pred):
    loss = binary_crossentropy(y_true, y_pred)
    penalty = K.sum(K.cast(K.less(y_pred, 0.5), K.floatx()) * 0.5)  # False negative penalty
    return loss + penalty

# Compile the model with the custom loss
model.compile(loss=custom_loss, optimizer='adam', metrics=['accuracy'])
# In this example, we create a custom loss function that adds a penalty for false negatives, ensuring the model is more sensitive to detecting fraud.


In [None]:
# Banking Scenario: Fraud Detection
# Let’s assume a banking business scenario where we need to build a model to detect fraudulent transactions based on 
# features like the transaction amount, merchant, time, and customer details.

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Define a simple Sequential model for fraud detection
model = Sequential()

# Input layer
model.add(Dense(64, input_dim=30, activation='relu'))  # 30 features from customer transaction data

# Hidden layers
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting

# Output layer
model.add(Dense(1, activation='sigmoid'))  # Binary classification (fraud or not)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])


In [None]:
# Model Compilation
# After defining the architecture, the next step is to compile the model. This involves specifying the optimizer, loss function, and metrics.
# •	Optimizer: Adjusts the model weights based on the gradients of the loss function.
# •	Loss Function: Measures the difference between the predicted and true values. For binary classification, we use binary_crossentropy.
# •	Metrics: Used to track the model's performance during training. Common metrics include accuracy.

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
In this case, we use the Adam optimizer because it is well-suited for many deep learning problems. The binary_crossentropy loss function is appropriate since we are solving a binary classification problem (fraud or not).
________________________________________
3. Model Training
After compiling the model, the next step is to train the model. This involves providing the model with training data and letting it learn by adjusting its weights based on the optimizer and loss function.
Banking Scenario: Training on Fraud Detection Data

# Example training data: X_train is the feature matrix, y_train are the labels (0 for non-fraud, 1 for fraud)
X_train = ...  # Transaction features
y_train = ...  # Labels (0 = non-fraud, 1 = fraud)

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# Banking Scenario: Evaluating Fraud Detection Model

# Example test data: X_test and y_test
X_test = ...  # Test features
y_test = ...  # Test labels

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


In [None]:
# Banking Scenario: Predicting Fraudulent Transactions

# New transaction data (feature matrix for a single transaction)
new_transaction = ...

# Make a prediction
prediction = model.predict(new_transaction)

# If the output is greater than 0.5, classify as fraud
if prediction[0] > 0.5:
    print("Fraudulent transaction detected!")
else:
    print("Transaction is legitimate.")


In [None]:
# Banking Scenario: Saving and Loading Fraud Detection Model

# Save the trained model to a file
model.save("fraud_detection_model.h5")

# Later, load the saved model
from keras.models import load_model
loaded_model = load_model("fraud_detection_model.h5")

# Make predictions with the loaded model
loaded_model.predict(new_transaction)


In [None]:
# Banking Scenario: Using Callbacks for Early Stopping

from keras.callbacks import EarlyStopping

# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model with the callback
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping])


In [None]:
# Python Code Sample:

from keras.models import Sequential
from keras.layers import Dense

# Create a Sequential model
model = Sequential()

# Add layers
model.add(Dense(units=64, input_dim=10, activation='relu'))  # input_dim is the number of features
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))  # Binary classification for credit risk (good/bad)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model (X_train, y_train would be your training data and labels)
model.fit(X_train, y_train, epochs=10, batch_size=32)


In [None]:
# Functional API Model
# The Functional API is more flexible than the Sequential model. It allows you to create models where layers can have 
# multiple inputs and outputs, and you can share layers between models.
# Use Case in Banking:
# •	Fraud Detection: Build a model that takes multiple types of data (transaction history, customer details) as inputs and predicts
# fraudulent transactions.
# # Python Code Sample:

from keras.models import Model
from keras.layers import Input, Dense

# Define input layers
input_transaction = Input(shape=(10,))  # 10 features for transaction data
input_customer = Input(shape=(5,))      # 5 features for customer data

# Process the inputs
x = Dense(64, activation='relu')(input_transaction)
y = Dense(64, activation='relu')(input_customer)

# Merge the processed inputs
merged = concatenate([x, y])

# Output layer
output = Dense(1, activation='sigmoid')(merged)

# Define the model
model = Model(inputs=[input_transaction, input_customer], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model (X_transaction, X_customer, y_train would be your training data)
model.fit([X_transaction, X_customer], y_train, epochs=10, batch_size=32)


In [None]:
# Model Subclassing
# Model Subclassing is a more flexible approach where you can define your own model class by subclassing the
# Keras Model class. This allows for complete control over the forward pass.
# Use Case in Banking:
# •	Loan Default Prediction: A complex model where you need fine-grained control over how layers are connected to predict loan defaults.
# # Python Code Sample:

from keras.models import Model
from keras.layers import Dense
from keras import layers

class CustomModel(Model):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.dense1 = Dense(64, activation='relu')
        self.dense2 = Dense(32, activation='relu')
        self.dense3 = Dense(1, activation='sigmoid')

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.dense3(x)

# Instantiate the model
model = CustomModel()

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model (X_train, y_train would be your training data)
model.fit(X_train, y_train, epochs=10, batch_size=32)
# 4. Convolutional Neural Networks (CNN)
# CNNs are primarily used for image-related tasks but can also be used for structured data if reformatted appropriately. 
# They use convolutional layers to extract features from data.
# Use Case in Banking:
# •	Cheque Image Recognition: Use CNNs to extract features from cheque images (e.g., handwritten digits, signatures) for automatic cheque processing.
# # Python Code Sample:

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Create a Sequential CNN model
model = Sequential()

# Add convolutional layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 3)))  # Input shape for images
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output and add dense layers
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification (fraud/no fraud)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model (X_train and y_train would be your image data and labels)
model.fit(X_train, y_train, epochs=10, batch_size=32)
# 5. Recurrent Neural Networks (RNN)
# RNNs are suitable for sequential data. They can maintain an internal state that makes them useful for tasks like time series prediction.
# Use Case in Banking: •	Time Series Forecasting: Predict future stock prices or interest rates based on historical data using RNNs.
# # Python Code Sample:

from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

# Create a Sequential model with RNN layers
model = Sequential()

# Add RNN layer
model.add(SimpleRNN(50, input_shape=(10, 1), activation='relu'))  # 10 time steps, 1 feature

# Add a dense output layer
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model (X_train and y_train are your time-series data)
model.fit(X_train, y_train, epochs=10, batch_size=32)
# 6. Long Short-Term Memory (LSTM)
# LSTM is a type of RNN designed to learn long-term dependencies, which is useful when the data has long-range dependencies.
# Use Case in Banking:•	Customer Churn Prediction: Predict if a customer will churn based on their behavior over time, using LSTM for sequence data.
# # Python Code Sample:

from keras.models import Sequential
from keras.layers import LSTM, Dense

# Create a Sequential model with LSTM layers
model = Sequential()

# Add LSTM layer
model.add(LSTM(50, input_shape=(10, 1), activation='relu', return_sequences=False))

# Add a dense output layer
model.add(Dense(1, activation='sigmoid'))  # Binary classification for churn prediction

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model (X_train and y_train are your sequential data)
model.fit(X_train, y_train, epochs=10, batch_size=32)
# 7. Autoencoders
# Autoencoders are unsupervised learning models that learn to encode input data into a smaller latent representation and then 
# reconstruct the data back to its original form.
# Use Case in Banking:
# •	Anomaly Detection: Detect fraudulent transactions by learning a reconstruction of normal transactions and flagging any 
# transaction that doesn’t fit.
# # Python Code Sample:

from keras.models import Sequential
from keras.layers import Dense

# Create an autoencoder model
model = Sequential()

# Encoder
model.add(Dense(128, activation='relu', input_dim=10))
model.add(Dense(64, activation='relu'))

# Latent space representation
model.add(Dense(32, activation='relu'))

# Decoder
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='sigmoid'))  # Same number of output neurons as input

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model (X_train would be your data)
model.fit(X_train, X_train, epochs=10, batch_size=32)
# 8. Generative Adversarial Networks (GANs)
# GANs consist of two models: a generator and a discriminator. The generator creates fake data, and the discriminator 
# attempts to distinguish between real and fake data.
# Use Case in Banking:
# •	Synthetic Data Generation: Use GANs to generate synthetic transaction data for training fraud detection models when real data is scarce.
# # Python Code Sample:

from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

# Define the generator model
generator = Sequential()
generator.add(Dense(128, input_dim=100, activation='relu'))
generator.add(Dense(256, activation='relu'))
generator.add(Dense(512, activation='relu'))
generator.add(Dense(1024, activation='relu'))
generator.add(Dense(10, activation='sigmoid'))  # Output synthetic data

# Define the discriminator model
discriminator = Sequential()
discriminator.add(Dense(1024, input_dim=10, activation='relu'))
discriminator.add(Dense(512, activation='relu'))
discriminator.add(Dense(256, activation='relu'))
discriminator.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the models
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# GAN model combines the generator and discriminator
discriminator.trainable = False
gan_input = Input(shape=(100,))
x = generator(gan_input)
gan_output = discriminator(x)
gan_model = Model(gan_input, gan_output)
gan_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Business Scenario: Credit Scoring: A feed-forward neural network can be used to predict whether a loan applicant will 
# default or not, based on their financial information (e.g., income, loan amount, credit history, etc.).
# Python Code Example:

from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Sample data (e.g., 1000 customers, each with 5 features like age, income, loan amount, etc.)
X_train = np.random.rand(1000, 5)  # 1000 customers, 5 features
y_train = np.random.randint(0, 2, 1000)  # 0 = no default, 1 = default

# Define the model
model = Sequential()

# Input layer and first hidden layer
model.add(Dense(units=64, input_dim=5, activation='relu'))  # 5 input features

# Second hidden layer
model.add(Dense(units=32, activation='relu'))

# Output layer (binary classification)
model.add(Dense(units=1, activation='sigmoid'))  # Sigmoid for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Predict on new data (new applicants)
new_data = np.random.rand(1, 5)  # A new applicant with 5 features
prediction = model.predict(new_data)
print("Default Probability: ", prediction)
# 2. Convolutional Neural Networks (CNN)
# Convolutional Neural Networks (CNNs) are used for image-based tasks but can also be applied to certain types of 
# structured data, like time-series or sequence data, in a reformatted manner.
# Business Scenario: Cheque Image Recognition
# In banks, cheque processing can be automated by using CNNs to recognize handwritten digits, signatures, and text from cheque images.
# Python Code Example:

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Assuming we have preprocessed and reshaped image data
# Let's assume the images are 64x64 pixels with 1 channel (grayscale)
X_train_images = np.random.rand(1000, 64, 64, 1)  # 1000 images of size 64x64
y_train_labels = np.random.randint(0, 2, 1000)  # 0 = fake cheque, 1 = real cheque

# Define the CNN model
model = Sequential()

# Add Convolutional Layer
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 1)))

# Add MaxPooling Layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output from the convolutional layer
model.add(Flatten())

# Add Fully Connected (Dense) Layers
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_images, y_train_labels, epochs=10, batch_size=32)

# Predict on new images
new_image = np.random.rand(1, 64, 64, 1)  # A new cheque image
prediction = model.predict(new_image)
print("Cheque authenticity prediction: ", prediction)
# 3. Recurrent Neural Networks (RNN)
# Recurrent Neural Networks (RNNs) are designed for sequential data. They are useful for time series prediction and other tasks 
# where the sequence of data is important, such as analyzing customer behavior over time.
# Business Scenario: Customer Churn Prediction
# A bank may want to predict whether a customer will churn (leave) based on their past behavior, such as transaction history 
# and interaction with customer support.
# Python Code Example:

from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
import numpy as np

# Sample data (e.g., 1000 customers with 10 time-steps of transaction history)
X_train = np.random.rand(1000, 10, 1)  # 10 time-steps, 1 feature (transaction amount)
y_train = np.random.randint(0, 2, 1000)  # 0 = no churn, 1 = churn

# Define the RNN model
model = Sequential()

# Add SimpleRNN layer
model.add(SimpleRNN(50, input_shape=(10, 1), activation='relu'))  # 10 time-steps, 1 feature

# Add a Dense output layer
model.add(Dense(1, activation='sigmoid'))  # Binary classification (churn or not)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Predict on new customer data
new_data = np.random.rand(1, 10, 1)  # New customer transaction data
prediction = model.predict(new_data)
print("Churn prediction: ", prediction)
# 4. Long Short-Term Memory (LSTM) Networks
# LSTM is a special type of RNN designed to capture long-term dependencies in sequential data. It is especially useful for datasets
# where long-range dependencies are critical.
# Business Scenario: Predicting Stock Prices
# Banks and financial institutions often use LSTM models to forecast stock prices, interest rates, or foreign exchange rates based on historical data.
# Python Code Example:

from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np

# Sample data (e.g., 1000 days of stock price data)
X_train = np.random.rand(1000, 60, 1)  # 60 time-steps (days), 1 feature (price)
y_train = np.random.rand(1000)  # Next day's price prediction

# Define the LSTM model
model = Sequential()

# Add LSTM layer
model.add(LSTM(50, input_shape=(60, 1), activation='relu'))

# Add Dense output layer
model.add(Dense(1))  # Predicting the next day's price (continuous output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Predict on new stock price data
new_data = np.random.rand(1, 60, 1)  # New 60 days of stock data
prediction = model.predict(new_data)
print("Next day's predicted stock price: ", prediction)
# 5. Autoencoders
# Autoencoders are unsupervised neural networks used for data compression and anomaly detection. They learn to compress data into 
# a lower-dimensional representation and then reconstruct it.
# Business Scenario: Anomaly Detection in Transactions
# Banks can use autoencoders to detect unusual transactions that might indicate fraud. Transactions that cannot be reconstructed 
# well by the autoencoder are flagged as potential anomalies.
# Python Code Example:

from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Sample data (e.g., 1000 transaction records with 10 features)
X_train = np.random.rand(1000, 10)  # 1000 transactions, 10 features (amount, location, time, etc.)

# Define the autoencoder model
model = Sequential()

# Encoder
model.add(Dense(8, activation='relu', input_dim=10))  # Compress from 10 features to 8
model.add(Dense(4, activation='relu'))  # Further compression to 4

# Decoder
model.add(Dense(8, activation='relu'))  # Expanding back to 8
model.add(Dense(10, activation='sigmoid'))  # Reconstructing back to 10 features

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, X_train, epochs=10, batch_size=32)

# Predict on new data
new_transaction = np.random.rand(1, 10)  # A new transaction record
reconstructed = model.predict(new_transaction)

# Compare the reconstructed data to the original data
print("Reconstructed transaction data: ", reconstructed)
# 6. Generative Adversarial Networks (GANs)
# Generative Adversarial Networks (GANs) are composed of two models: a generator that creates fake data and a discriminator 
# that tries to distinguish between real and fake data. GANs are typically used for generating synthetic data.
# Business Scenario: Synthetic Fraudulent Transaction Generation
# Banks can use GANs to generate synthetic fraudulent transaction data to train fraud detection models, especially when real 
# fraudulent data is scarce.
# Python Code Example:

from keras.models import Sequential
from keras.layers import Dense
import numpy as np

# Define the generator model (generates fake data)
generator = Sequential()
generator.add(Dense(128, input_dim=100, activation='relu'))
generator.add(Dense(256, activation='relu'))
generator.add(Dense(512, activation='relu'))
generator.add(Dense(1024, activation='relu'))
generator.add(Dense(10, activation='sigmoid'))  # 10 features (synthetic transaction)

# Define the discriminator model (classifies real vs fake data)
discriminator = Sequential()
discriminator.add(Dense(1024, input_dim=10, activation='relu'))
discriminator.add(Dense(512, activation='relu'))
discriminator.add(Dense(256, activation='relu'))
discriminator.add(Dense(1, activation='sigmoid'))  # Binary classification (real or fake)

# Compile the discriminator model
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# GAN model combines generator and discriminator
discriminator.trainable = False
gan_input = Input(shape=(100,))
x = generator(gan_input)
gan_output = discriminator(x)
gan_model = Model(gan_input, gan_output)
gan_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
