***CONSUMER START***

In [1]:
from kafka import KafkaConsumer
import pickle
import json
import pandas as pd
import shap
import numpy as np
import matplotlib.pyplot as plt

***PREPARING KAFKA FOR DATA INCOMING***

In [2]:
bootstrap_servers = ['localhost:9092']
consumer = KafkaConsumer('temp_05', bootstrap_servers=bootstrap_servers, auto_offset_reset='earliest', group_id='my-group')

In [3]:
def removeID(msg):
    msg_i=msg.value.decode('utf-8')
    data_dict = json.loads(msg_i)
    ID=data_dict['ID']
    del data_dict['ID']
    data_list=[value for value in data_dict.values()]
    keys=[key for key in data_dict.keys()]

    return data_list, ID, keys

***PREDICTING***

In [4]:
with open("model.pkl", "rb") as f:
    model = pickle.load(f)

with open("explainer.pkl", "rb") as f:
    explainer = pickle.load(f)

def predict(data_list):
    
    y_pred_test=model.predict([data_list])
    return y_pred_test

***EXPLAINATION***

In [5]:
def prepare(data_list):
    prev_data_frame = pd.DataFrame(data_list)
    data_frame=prev_data_frame.transpose().values[0]
    
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(data_frame, check_additivity=False)
    shap_values = shap_values.transpose()
    
    return {'data_frame': data_frame, 'shap_values': shap_values}

In [6]:
# feature importance table

def plot_feature_importance(importance_df):
  plt.bar(importance_df['feature'], importance_df['importance'])
  plt.xlabel("Feature")
  plt.ylabel("Contribution")
  plt.title("Contribution of Each Feature in the Fraudulence")
  plt.xticks(rotation=45, ha='right')
  plt.grid(axis='y', linestyle='--', alpha=0.6)
  plt.tight_layout()
  plt.show()

def get_feature_importance_ranking(data_list, keys):
  
  shap_values = prepare(data_list)['shap_values']
  data_frame = prepare(data_list)['data_frame']
  # print(shap_values)

  for name, contribution in zip(keys, shap_values[1]):
    if(contribution > 0.065):
      print(f" - Feature: {name} (value: {data_frame[keys.index(name)]:.4f}) made the transaction Fraudulent by a Factor of:  {abs(contribution):.2f}.")
  
  feature_importances = np.abs(shap_values).mean(axis=0)
  importance_df = pd.DataFrame({'feature': keys, 'importance': feature_importances})
  importance_df.sort_values(by='importance', ascending=False, inplace=True)

  plot_feature_importance(importance_df)

In [7]:
# SHAP values individually

def plot_shap_value_distribution(data_list, keys):
    
  shap_values = prepare(data_list)['shap_values']
    
  for i in range(shap_values.shape[1]):
    plt.hist(shap_values[:, i])
    plt.xlabel("SHAP Value")
    plt.ylabel("Count")
    plt.title(f"Distribution of SHAP Values for {keys[i]}")
    plt.show()
    plt.clf()

In [8]:
# feature value - SHAP value pair individually

def explain_individual_datapoint(data_list, keys):
    
  shap_values = prepare(data_list)['shap_values']

  for feature_name, shap_value in zip(keys, shap_values[0]):
    print(f"Feature: {feature_name}, SHAP Value: {shap_value}")

In [9]:
def explain(data_list, keys):
    
    get_feature_importance_ranking(data_list, keys)
    # plot_shap_value_distribution(data_list, keys)
    # explain_individual_datapoint(data_list, keys)

***DATA INCOMING***

In [10]:
for msg in consumer:
    rL=removeID(msg)
    
    data_list=rL[0]
    ID=rL[1]
    keys=rL[2]

    y=predict(data_list)

    if(y==1):
        print(f"The transaction with ID: {ID} is Fraudulent")
        try:
            explain(data_list, keys)
        except Exception as e:
            print('Something happened ', e)

***CONSUMER STOP***

In [None]:
consumer.close()
print("Consumer stopped")