In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
file_path = r'C:\Users\shamb\Downloads\default+of+credit+card+clients\default of credit card clients.xls'
data = pd.read_excel(file_path, header=1)  # Assuming the first row is the header

# Inspect the dataset
print(data.head())

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
categorical_columns = ['SEX', 'EDUCATION', 'MARRIAGE']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Feature and target separation
X = data.drop(columns=['default payment next month'])  # Assuming 'default.payment.next.month' is the target column
y = data['default payment next month']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Data preprocessing completed successfully.")


   ID  LIMIT_BAL  SEX  EDUCATION  MARRIAGE  AGE  PAY_0  PAY_2  PAY_3  PAY_4  \
0   1      20000    2          2         1   24      2      2     -1     -1   
1   2     120000    2          2         2   26     -1      2      0      0   
2   3      90000    2          2         2   34      0      0      0      0   
3   4      50000    2          2         1   37      0      0      0      0   
4   5      50000    1          2         1   57     -1      0     -1      0   

   ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_AMT1  PAY_AMT2  PAY_AMT3  \
0  ...          0          0          0         0       689         0   
1  ...       3272       3455       3261         0      1000      1000   
2  ...      14331      14948      15549      1518      1500      1000   
3  ...      28314      28959      29547      2000      2019      1200   
4  ...      20940      19146      19131      2000     36681     10000   

   PAY_AMT4  PAY_AMT5  PAY_AMT6  default payment next month  
0         0         0   

  data.fillna(method='ffill', inplace=True)


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Build the ANN model
model = Sequential()
model.add(Dense(units=32, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 838us/step - accuracy: 0.7127 - loss: 0.5740 - val_accuracy: 0.8127 - val_loss: 0.4602
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 670us/step - accuracy: 0.8218 - loss: 0.4456 - val_accuracy: 0.8165 - val_loss: 0.4505
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 688us/step - accuracy: 0.8176 - loss: 0.4456 - val_accuracy: 0.8190 - val_loss: 0.4492
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 694us/step - accuracy: 0.8238 - loss: 0.4266 - val_accuracy: 0.8167 - val_loss: 0.4482
Epoch 5/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 698us/step - accuracy: 0.8219 - loss: 0.4273 - val_accuracy: 0.8175 - val_loss: 0.4458
Epoch 6/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 719us/step - accuracy: 0.8267 - loss: 0.4172 - val_accuracy: 0.8194 - val_loss: 0.4447
Epoch 7/50
[1m6

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import tensorflow as tf

# Load the dataset
file_path = r'C:\Users\shamb\Downloads\default+of+credit+card+clients\default of credit card clients.xls'
data = pd.read_excel(file_path, header=1)  # Assuming the first row is the header

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
categorical_columns = ['SEX', 'EDUCATION', 'MARRIAGE']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Feature and target separation
X = data.drop(columns=['default payment next month'])  # Assuming 'default.payment.next.month' is the target column
y = data['default payment next month']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the hypermodel
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_input', min_value=16, max_value=128, step=16), activation='relu', input_shape=(X_train.shape[1],)))
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=16, max_value=128, step=16), activation='relu'))
    model.add(Dense(units=1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Initialize the tuner
tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=50,
                     factor=3,
                     directory='my_dir',
                     project_name='credit_card_fraud')

# Perform the hyperparameter tuning
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the input layer is {best_hps.get('units_input')} with {best_hps.get('num_layers')} layers, each having units {[best_hps.get(f'units_{i}') for i in range(best_hps.get('num_layers'))]}. The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")

# Build the model with the optimal hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


Trial 90 Complete [00h 00m 04s]
val_accuracy: 0.8202083110809326

Best val_accuracy So Far: 0.8216666579246521
Total elapsed time: 00h 05m 46s

The hyperparameter search is complete. The optimal number of units in the input layer is 128 with 3 layers, each having units [112, 16, 16]. The optimal learning rate for the optimizer is 0.001.

Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 917us/step - accuracy: 0.7653 - loss: 0.5072 - val_accuracy: 0.8123 - val_loss: 0.4511
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 724us/step - accuracy: 0.8219 - loss: 0.4322 - val_accuracy: 0.8200 - val_loss: 0.4427
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 775us/step - accuracy: 0.8238 - loss: 0.4270 - val_accuracy: 0.8177 - val_loss: 0.4453
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 723us/step - accuracy: 0.8223 - loss: 0.4279 - val_accuracy: 0.8175 - val_loss: 0.4436
Epoc

In [2]:
import os

# Define the project directory structure
project_dir = r'C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model'
model_dir = os.path.join(project_dir, 'model', 'saved_model')
templates_dir = os.path.join(project_dir, 'templates')

# Create the directories
os.makedirs(model_dir, exist_ok=True)
os.makedirs(templates_dir, exist_ok=True)

print(f"Project directories created at {project_dir}")


Project directories created at C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model


In [3]:
import os

# Define the project directory structure
project_dir = r'C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model'
model_dir = os.path.join(project_dir, 'model', 'saved_model')
templates_dir = os.path.join(project_dir, 'templates')

# Create the directories
os.makedirs(model_dir, exist_ok=True)
os.makedirs(templates_dir, exist_ok=True)

print(f"Project directories created at {project_dir}")

# Install dependencies from requirements.txt
os.system(f'pip install -r {os.path.join(project_dir, "requirements.txt")}')


Project directories created at C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model


1

In [6]:
import os
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import tensorflow as tf

# Define the project directory
base_save_dir = r'C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model'
model_dir = os.path.join(base_save_dir, 'model')
os.makedirs(model_dir, exist_ok=True)

# Load the dataset
file_path = r'C:\Users\shamb\Downloads\default+of+credit+card+clients\default of credit card clients.xls'
data = pd.read_excel(file_path, header=1)  # Assuming the first row is the header

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
categorical_columns = ['SEX', 'EDUCATION', 'MARRIAGE']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Feature and target separation
X = data.drop(columns=['default payment next month'])  # Adjust column name if necessary
y = data['default payment next month']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the hypermodel
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_input', min_value=16, max_value=128, step=16), activation='relu', input_shape=(X_train.shape[1],)))
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=16, max_value=128, step=16), activation='relu'))
    model.add(Dense(units=1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Initialize the tuner
tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=50,
                     factor=3,
                     directory='my_dir',
                     project_name='credit_card_fraud')

# Perform the hyperparameter tuning
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the input layer is {best_hps.get('units_input')} with {best_hps.get('num_layers')} layers, each having units {[best_hps.get(f'units_{i}') for i in range(best_hps.get('num_layers'))]}. The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")

# Build the model with the optimal hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

# Save the trained model in .keras format
model_save_path_keras = os.path.join(model_dir, 'credit_card_fraud_model.keras')
model.save(model_save_path_keras)
print(f"Model saved to {model_save_path_keras}")

# Save the scaler parameters
scaler_save_path = os.path.join(model_dir, 'scaler.joblib')
joblib.dump(scaler, scaler_save_path)
print(f"Scaler saved to {scaler_save_path}")


  data.fillna(method='ffill', inplace=True)


Reloading Tuner from my_dir\credit_card_fraud\tuner0.json

The hyperparameter search is complete. The optimal number of units in the input layer is 112 with 2 layers, each having units [96, 32]. The optimal learning rate for the optimizer is 0.001.

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7968 - loss: 0.4857 - val_accuracy: 0.8144 - val_loss: 0.4507
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 758us/step - accuracy: 0.8130 - loss: 0.4465 - val_accuracy: 0.8188 - val_loss: 0.4451
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - accuracy: 0.8233 - loss: 0.4301 - val_accuracy: 0.8213 - val_loss: 0.4456
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.8212 - loss: 0.4292 - val_accuracy: 0.8165 - val_loss: 0.4455
Epoch 5/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.8234 - loss: 0.4222 - val_accuracy: 0.8177 - val_loss: 0.4441
Epoch 6/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 752us/step - accuracy: 0.8201 - loss: 0.4230 - val_accuracy: 0.8138 - val_loss: 0.4476
Epoch 7/50
[1m600/600[0m [

In [18]:
import os
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import load_model

# Define the project directory
base_save_dir = r'C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir'
model_path_h5 = os.path.join(base_save_dir, 'credit_card_fraud_model.h5')
model_path_keras = os.path.join(base_save_dir, 'credit_card_fraud_model.keras')
scaler_path = os.path.join(base_save_dir, 'saved_model', 'model', 'scaler.joblib')

# Load the dataset
file_path = r'C:\Users\shamb\Downloads\default+of+credit+card+clients\default of credit card clients.xls'
data = pd.read_excel(file_path, header=1)

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
categorical_columns = ['SEX', 'EDUCATION', 'MARRIAGE']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Feature and target separation
X = data.drop(columns=['default payment next month'])
y = data['default payment next month']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Load the model (choose the appropriate path)
if os.path.exists(model_path_h5):
    model = load_model(model_path_h5)
elif os.path.exists(model_path_keras):
    model = tf.keras.models.load_model(model_path_keras)
else:
    raise FileNotFoundError("Model file not found")

# Load the saved scaler
scaler = joblib.load(scaler_path)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')



  data.fillna(method='ffill', inplace=True)


[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step - accuracy: 0.8049 - loss: 0.4442
Test Loss: 0.4433768391609192
Test Accuracy: 0.8134999871253967


In [7]:
import os
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import load_model

# Define paths for the saved model and scaler
base_save_dir = r'C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir'
model_path = os.path.join(base_save_dir, 'saved_model', 'model', 'credit_card_fraud_model.keras')
scaler_path = os.path.join(base_save_dir, 'saved_model', 'model', 'scaler.joblib')

# Check if the model and scaler files exist
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")

if not os.path.exists(scaler_path):
    raise FileNotFoundError(f"Scaler file not found at {scaler_path}")

# Load the trained model
model = load_model(model_path)
print(f"Model loaded from {model_path}")

# Load the scaler
scaler = joblib.load(scaler_path)
print(f"Scaler loaded from {scaler_path}")

# Assume we have new data for which we need predictions
# Example of new input data (replace this with actual new data)
new_data = pd.DataFrame({
    'ID': [1, 2],  # Add ID column
    'LIMIT_BAL': [50000, 200000],  # Add LIMIT_BAL column
    'SEX': [1, 2],                 # Example values for gender
    'EDUCATION': [2, 3],           # Example values for education
    'MARRIAGE': [1, 2],            # Example values for marriage status
    'AGE': [25, 40],               # Example values for age
    'PAY_0': [1, 2],
    'PAY_2': [1, 2],
    'PAY_3': [1, 2],
    'PAY_4': [1, 2],
    'PAY_5': [1, 2],
    'PAY_6': [1, 2],
    'BILL_AMT1': [1000, 2000],
    'BILL_AMT2': [500, 1000],
    'BILL_AMT3': [2000, 3000],
    'BILL_AMT4': [1500, 2500],
    'BILL_AMT5': [3000, 4000],
    'BILL_AMT6': [2500, 3500],
    'PAY_AMT1': [500, 1000],
    'PAY_AMT2': [1000, 2000],
    'PAY_AMT3': [1500, 2500],
    'PAY_AMT4': [2000, 3000],
    'PAY_AMT5': [2500, 3500],
    'PAY_AMT6': [3000, 4000]
})

# Encode categorical variables
categorical_columns = ['SEX', 'EDUCATION', 'MARRIAGE']
for col in categorical_columns:
    le = LabelEncoder()
    new_data[col] = le.fit_transform(new_data[col])

# Normalize the new data using the loaded scaler
new_data_scaled = scaler.transform(new_data)

# Make predictions
predictions = model.predict(new_data_scaled)

# Interpret predictions (0: Not Default, 1: Default)
predicted_classes = (predictions > 0.5).astype(int)

# Print the results
for i, prediction in enumerate(predicted_classes):
    print(f"Transaction {i+1} is {'Default' if prediction == 1 else 'Not Default'}")

Model loaded from C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model\model\credit_card_fraud_model.keras
Scaler loaded from C:\Users\shamb\Desktop\Ai powered credit card fraud detection\my_dir\saved_model\model\scaler.joblib
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329ms/step
Transaction 1 is Default
Transaction 2 is Default
