<a href="https://colab.research.google.com/github/Shahid-TheMatrixMaker/100-Data-Science-Project/blob/main/Credit%2BCard%2BFraud%2BDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv('creditcard.csv')
data.head()

In [None]:
print(data['Class'].value_counts()/len(data))

In [None]:
print(data.isnull().sum())

In [None]:
data = data.dropna(axis=0)

In [None]:
Q1 = data['Amount'].quantile(0.25)
Q3 = data['Amount'].quantile(0.75)
IQR = Q3 - Q1

In [None]:
outliers = data[(data['Amount'] < (Q1 - 1.5 * IQR)) | (data['Amount'] > (Q3 + 1.5 * IQR))]

In [None]:
data['Amount'].hist()
plt.title('Transaction Amount Distribution')

In [None]:
sns.scatterplot(x='Amount', y='Time', data=data);

In [None]:
corr = data.corr()
sns.heatmap(corr)

In [None]:
from imblearn.over_sampling import SMOTE

X = data.drop('Class', axis=1)
y = data['Class']
oversampler = SMOTE(k_neighbors=1)
X_smote, y_smote = oversampler.fit_resample(X, y)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_smote, y_smote, test_size=0.3, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=10000)
clf.fit(X_train, y_train)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_pred = clf.predict(X_valid)
precision = precision_score(y_valid, y_pred)
recall = recall_score(y_valid, y_pred)
f1 = f1_score(y_valid, y_pred)

In [None]:
print('Precision:', precision)
print('Recall :', recall )
print('f1 Score:', f1)

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# Train model 1
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Train model 2
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

# Ensemble predictions
y_pred_ensemble = (rf.predict(X_valid) + gb.predict(X_valid)) / 2

In [None]:
from sklearn.ensemble import IsolationForest

anomaly_model = IsolationForest(contamination=0.01)
anomaly_model.fit(X_train)

anomaly_scores = anomaly_model.decision_function(X_valid)

# Define the threshold based on your requirements
threshold = -0.5

anomalies = anomaly_scores < threshold

In [None]:
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense

X_train_array = X_train.values
X_train_reshaped = X_train_array.reshape(X_train_array.shape[0], X_train_array.shape[1], 1)
num_timesteps = X_train_reshaped.shape[1]
num_features = X_train_reshaped.shape[2]

model = Sequential()
model.add(LSTM(64, input_shape=(num_timesteps, num_features)))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_reshaped, y_train, epochs=5)

In [None]:
num_features = X_valid.shape[1]
input = Input(shape=(num_features,))
encoded = Dense(32, activation='relu')(input)
decoded = Dense(num_features, activation='sigmoid')(encoded)

autoencoder = Model(input, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

recon_err = autoencoder.evaluate(X_valid)
anomalies = recon_err > threshold

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras import Input

# Assuming you have your features in X and labels in y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the training data
X_train_array = X_train.values
X_train_reshaped = X_train_array.reshape(X_train_array.shape[0], X_train_array.shape[1], 1)

# Reshape the test data
X_test_array = X_test.values
X_test_reshaped = X_test_array.reshape(X_test_array.shape[0], X_test_array.shape[1], 1)

num_timesteps = X_train_reshaped.shape[1]
num_features = X_train_reshaped.shape[2]

model = Sequential()
model.add(LSTM(64, input_shape=(num_timesteps, num_features)))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model on the training data and validate on the test data
model.fit(X_train_reshaped, y_train, epochs=5, validation_data=(X_test_reshaped, y_test))

# Evaluate the model on the test data
scores = model.evaluate(X_test_reshaped, y_test, verbose=0)
print(f'Test loss: {scores[0]} / Test accuracy: {scores[1]}')


In [None]:
importances = gb.feature_importances_
print(importances)

In [None]:
plt.barh(X.columns, importances)
plt.title('Gradient Boost Feature Importances')

In [None]:
# Importing the necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import pickle
from flask import Flask, request, jsonify
# Assuming you have your features in X and labels in y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Reshape the training data
X_train_array = X_train.values
X_train_reshaped = X_train_array.reshape(X_train_array.shape[0], X_train_array.shape[1], 1)
# Reshape the test data
X_test_array = X_test.values
X_test_reshaped = X_test_array.reshape(X_test_array.shape[0], X_test_array.shape[1], 1)

num_timesteps = X_train_reshaped.shape[1]
num_features = X_train_reshaped.shape[2]

model = Sequential()
model.add(LSTM(64, input_shape=(num_timesteps, num_features)))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model on the training data and validate on the test data
model.fit(X_train_reshaped, y_train, epochs=5, validation_data=(X_test_reshaped, y_test))
# Evaluating the model
scores = model.evaluate(X_test, y_test)
print(f'Test Loss: {scores[0]}')
print(f'Test Accuracy: {scores[1]}')

In [None]:
# Saving the trained model using pickle
pickle.dump(model, open('fraud_detection_model.pkl', 'wb'))

# Flask application for deployment
app = Flask(__name__)

# Endpoint for making predictions
@app.route('/predict', methods=['POST'])
def predict():
    # Load the saved model
    loaded_model = pickle.load(open('fraud_detection_model.pkl', 'rb'))

    # Get the input data from the request
    data = request.get_json()
    input_data = np.array(data['input'])

    # Preprocess the input data as required
    input_data = np.reshape(input_data, (1, input_data.shape[0], 1))

    # Make predictions using the loaded model
    predictions = loaded_model.predict(input_data)

    # Return the predictions as a response
    return jsonify({'predictions': predictions.tolist()})

In [None]:
# Running the Flask application on a web server
if __name__ == '__main__':
    app.run()