<a href="https://colab.research.google.com/github/SamudralaAnuhya/FraudDetection/blob/main/FraudDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q groq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/109.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m92.2/109.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.6/109.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
pip install us


Collecting us
  Downloading us-3.2.0-py3-none-any.whl.metadata (10 kB)
Downloading us-3.2.0-py3-none-any.whl (13 kB)
Installing collected packages: us
Successfully installed us-3.2.0


In [None]:
from google.colab import userdata
from groq import Groq

# Set up the Groq client
client = Groq(
    api_key=userdata.get('GROQ_API_KEY')
)

In [None]:
# Define Prompt Templates
ZERO_SHOT_PROMPT_TEMPLATE = """
System: You are a highly skilled fraud detection expert. Your task is to analyze financial transactions and determine if they are likely fraudulent.

User: Analyze the following transaction and provide your assessment:
- Amount: {amount}
- Location: {location}
- User Location: {user_location}
- Transaction Time: {time}

Respond with a structured analysis:
- Prediction: [Fraudulent or Legitimate]
- Reason: Provide a concise explanation for your prediction, highlighting any suspicious factors.
"""

FEW_SHOT_PROMPT_TEMPLATE = """
System: You are a fraud detection expert. Utilize the provided examples to guide your analysis of new transactions.

Example 1:
- Amount: $10,000
- Location: Paris, France
- User Location: New York, USA
- Transaction Time: 3:00 AM EST
- Result: Fraudulent
- Reason: The transaction originated from an international location significantly distant from the user's usual location during unusual hours, raising suspicion of unauthorized activity.

Example 2:
- Amount: $50
- Location: New York, USA
- User Location: New York, USA
- Transaction Time: 2:00 PM EST
- Result: Legitimate
- Reason: The transaction aligns with the user's typical spending patterns and location, indicating normal activity.

Example 3:
- Amount: $2,000
- Location: San Francisco, USA
- User Location: Los Angeles, USA
- Transaction Time: 10:00 AM PST
- Result: Potentially Fraudulent
- Reason: While the transaction location is within the same country, it's outside the user's usual city and involves a larger-than-usual amount, requiring further investigation.

Now analyze the following transaction:
- Amount: {amount}
- Location: {location}
- User Location: {user_location}
- Transaction Time: {time}

Provide your assessment:
- Prediction: [Fraudulent, Legitimate, Potentially Fraudulent]
- Reason: Explain your prediction, citing specific factors that influenced your judgment.
"""

CHAIN_OF_THOUGHT_PROMPT_TEMPLATE = """
System: You are a meticulous fraud detection analyst. Your task is to systematically evaluate transactions for potential fraud.

User: Analyze the following transaction for potential fraud:
- Amount: {amount}
- Location: {location}
- User Location: {user_location}
- Transaction Time: {time}

Think step-by-step:
1. Compare the transaction location to the user's usual location. Are they significantly different? If so, this could be a red flag.
2. Analyze the transaction time. Does it fall outside the user's typical active hours? Unusual transaction times can indicate suspicious activity.
3. Evaluate the transaction amount against the user's usual spending pattern. Is it significantly higher than their average transaction value? Large deviations from typical spending are a potential indicator of fraud.
4. Combine all these factors to decide if the transaction is likely fraudulent.

Provide your assessment:
- Prediction: [Fraudulent or Legitimate]
- Reason: Explain your prediction, clearly outlining the factors that led to your conclusion.
"""


In [37]:
def query_llm(model_name, chat_history):
    """
    Calls your LLM. Remove or replace 'client' with your actual code.
    Returns a string containing the model's response.
    """
    try:
        # Filter for system/user/assistant messages only
        messages = [
            {"role": msg["role"], "content": msg["content"]}
            for msg in chat_history
            if msg["role"] in ["system", "user", "assistant"]
        ]

        completion = client.chat.completions.create(
            messages=messages,
            model=model_name,
            temperature=0.2,  # example hyperparameter
        )

        if completion.choices and len(completion.choices) > 0:
            return completion.choices[0].message.content
        return "No response generated."

    except Exception as e:
        return f"Error querying the model: {e}"


In [38]:
import re

# Function to query fraud detection
def query_fraud_detection(model_name, chat_history, template, amount, location, user_location, time):

    try:
        # Format the prompt
        prompt = template.format(
            amount=amount,
            location=location,
            user_location=user_location,
            time=time
        )

        # Add the user's input to chat history
        chat_history.append({"role": "user", "content": prompt})

        # 3) Query the model and store raw response
        response_text = query_llm(model_name, chat_history)

        # 4) Add the model's response to chat history
        chat_history.append({"role": "assistant", "content": response_text})

        # 5) Parse the response for 'Prediction' and 'Reason'
        prediction, reason = parse_prediction_and_reason(response_text)

        # 6) Return them separately (or a dict, if you prefer)
        return prediction, reason

    except Exception as e:
        # If something fails, return an error message as the 'prediction'
        return f"Error querying the model: {e}", ""

In [39]:
def parse_prediction_and_reason(response_text):
    """
    Use simple regex to find the 'Prediction' and everything after 'Reason'.
    Adjust to match your LLM's exact output format.
    """
    # Look for a line with "Prediction: ..."
    match_pred = re.search(r'(?i)\bprediction:\s*(.*)', response_text)
    # Look for a line or block after "Reason: ..."
    match_reason = re.search(r'(?i)\breason:\s*(.*)', response_text, re.DOTALL)

    if match_pred:
        prediction = match_pred.group(1).strip()
    else:
        prediction = "Unknown"

    if match_reason:
        reason = match_reason.group(1).strip()
    else:
        reason = "No reason provided."

    return prediction, reason

In [54]:
import ipywidgets as widgets
from IPython.display import Markdown, display
from us import states
import pandas as pd

# Interactive function for fraud detection
def fraud_detection_interactive():
  model_options = ["llama3-70b-8192", "gemma2-9b-it", "llama-3.3-70b-specdec"]
  prompt_options = {
        "Zero-Shot": ZERO_SHOT_PROMPT_TEMPLATE,
        "Few-Shot": FEW_SHOT_PROMPT_TEMPLATE,
        "Chain of Thought": CHAIN_OF_THOUGHT_PROMPT_TEMPLATE,
    }
  us_states = [state.name for state in states.STATES]
  hour_options = [f"{i:02d}:00" for i in range(24)]  # 00:00 ... 23:00

  transactions_data = []
  chat_history = []

  # Widgets
  customer_id_widget = widgets.Text(description="Customer ID:", placeholder="Enter customer ID")
  model_widget = widgets.Dropdown(description="Model:", options=model_options)
  prompt_widget = widgets.Dropdown(description="Prompt:", options=list(prompt_options.keys()))
  amount_widget = widgets.Text(description="Amount ($):", placeholder="Enter amount")
  location_widget = widgets.Combobox(
      description="Location:",
      options=us_states,
      placeholder="Enter or select location",
      ensure_option=False
  )
  user_location_widget = widgets.Combobox(
      description="User Location:",
      options=us_states,
      placeholder="Enter or select location",
      ensure_option=False
  )
  time_widget = widgets.Combobox(
      description="Time:",
      options=hour_options,
      placeholder="Enter or select time",
      ensure_option=False
  )

  submit_button = widgets.Button(description="Submit")
  export_button = widgets.Button(description="Export to CSV")

  # Outputs
  output_area = widgets.Output()
  table_output = widgets.Output()

  # Watchers for "Other" selection
  def on_location_change(change):
      if change['name'] == 'value' and change['new'] == 'Other':
          location_widget.value = ''
          location_widget.placeholder = 'Enter a custom location...'
  location_widget.observe(on_location_change, names='value')

  def on_user_location_change(change):
      if change['name'] == 'value' and change['new'] == 'Other':
          user_location_widget.value = ''
          user_location_widget.placeholder = 'Enter a custom location...'
  user_location_widget.observe(on_user_location_change, names='value')

  # To autofill user_location based on stored data
  customer_data = {}
  def on_customer_id_change(change):
      new_customer_id = change['new']
      if new_customer_id in customer_data:
          # Auto-fill location fields from stored data
          # location_widget.value = customer_data[new_customer_id]['location']
          user_location_widget.value = customer_data[new_customer_id]['user_location']
      else:
          # Clear if it's a new ID
          # location_widget.value = ""
          user_location_widget.value = ""
  customer_id_widget.observe(on_customer_id_change, names='value')


  # Submit logic
  def on_submit(_):
      with output_area:
          output_area.clear_output()

          # Gather user inputs
          customer_id = customer_id_widget.value
          model_name = model_widget.value
          prompt_type = prompt_widget.value
          amount = amount_widget.value
          location = location_widget.value
          user_location = user_location_widget.value
          time = time_widget.value

          # Basic validation
          if not all([customer_id, model_name, prompt_type, amount, location, user_location, time]):
              print("Error: All fields are required.")
              return

          # Query the model
          template = prompt_options[prompt_type]
          prediction, response = query_fraud_detection(
              model_name, chat_history, template,
              amount, location, user_location, time
          )

          # Debug info
          print("Customer ID:", customer_id)
          print("Model Name:", model_name)
          print("Prompt Type:", prompt_type)
          print("Amount:", amount)
          print("Location:", location)
          print("User Location:", user_location)
          print("Time:", time)
          print("\nModel's Response:")
          display(Markdown(response))

          # Append row to in-memory data
          row_data = {
              "Customer ID": customer_id,
              "Model": model_name,
              "Prompt Type": prompt_type,
              "Amount": amount,
              "Location": location,
              "User Location": user_location,
              "Time": time,
              "prediction": prediction,
              "Response": response
          }
          transactions_data.append(row_data)

          # Store user data for auto-fill next time
          customer_data[customer_id] = {
              # 'location': location,
              'user_location': user_location
          }

          # Clear fields
          customer_id_widget.value = ""
          amount_widget.value = ""
          location_widget.value = ""
          user_location_widget.value = ""
          time_widget.value = ""
          location_widget.placeholder = "Enter or select location"
          user_location_widget.placeholder = "Enter or select location"

          # Refresh table display
          with table_output:
              table_output.clear_output()
              df = pd.DataFrame(transactions_data)
              display(df)

  # NOTE: on_export must be OUTSIDE on_submit
  def on_export(_):
      """Export current transaction data to CSV."""
      with output_area:
          output_area.clear_output()
          if not transactions_data:
              print("No transaction data to export!")
              return
          df = pd.DataFrame(transactions_data)
          csv_filename = "transactions_log.csv"
          df.to_csv(csv_filename, index=False)
          print(f"Exported to {csv_filename}!")

  # Attach button events
  submit_button.on_click(on_submit)
  export_button.on_click(on_export)

  # Display widgets
  display(
      customer_id_widget,
      model_widget,
      prompt_widget,
      amount_widget,
      location_widget,
      user_location_widget,
      time_widget,
      submit_button,
      export_button,
      output_area,
      table_output
  )

# Finally, call the function
fraud_detection_interactive()


Text(value='', description='Customer ID:', placeholder='Enter customer ID')

Dropdown(description='Model:', options=('llama3-70b-8192', 'gemma2-9b-it', 'llama-3.3-70b-specdec'), value='ll…

Dropdown(description='Prompt:', options=('Zero-Shot', 'Few-Shot', 'Chain of Thought'), value='Zero-Shot')

Text(value='', description='Amount ($):', placeholder='Enter amount')

Combobox(value='', description='Location:', options=('Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',…

Combobox(value='', description='User Location:', options=('Alabama', 'Alaska', 'Arizona', 'Arkansas', 'Califor…

Combobox(value='', description='Time:', options=('00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00'…

Button(description='Submit', style=ButtonStyle())

Button(description='Export to CSV', style=ButtonStyle())

Output()

Output()