In [None]:
"""
Project1 : Stock Trade Outlier Analysis using Graph Database

Objectives:
    Develop an FX Trade Analysis System – Build a system to analyze foreign exchange (FX) trades and detect anomalies.
    
    Construct a Graph-Based Trade Model – Store FX trade data in a Neo4j graph database, representing trades as nodes and relationships between them as edges.
    
    Identify Outliers in Trading Patterns – Implement statistical methods (Z-score, IQR) to detect unusual trades based on trade volume, price, and timestamp.
    
    Compare Actual Trades with Expected Guidelines – Define trade pattern benchmarks and identify deviations from expected behavior.
    
    Visualize and Report Insights – Generate interactive visualizations and summary reports highlighting trade outliers and deviations.
    

"""

In [1]:
!pip install neo4j pandas numpy networkx matplotlib scipy fpdf gradio




In [19]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from scipy.stats import zscore
from fpdf import FPDF
import gradio as gr
from neo4j import GraphDatabase

# Neo4j connection details
NEO4J_URL = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "password"

# Establish connection to Neo4j
neo4j_driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
print("Connected to Neo4j successfully!")

# Generate sample trade data
np.random.seed(42)
NUM_TRADES = 15

trade_data = {
    "trade_id": range(1, NUM_TRADES + 1),
    "currency_pair": ["EUR/USD"] * 7 + ["GBP/USD"] * 8,
    "volume": np.concatenate((
        np.random.randint(5000, 20000, NUM_TRADES - 4),
        [50000, 100000, 200, 300]
    )),
    "price": np.round(np.concatenate((
        np.random.uniform(1.1, 1.3, NUM_TRADES - 4),
        [1.5, 0.8, 1.6, 0.7]
    )), 4),
    "timestamp": pd.date_range("2024-02-01", periods=NUM_TRADES, freq="h")
}

trade_df = pd.DataFrame(trade_data)

# Insert Data into Neo4j
def insert_trade_node(tx, trade):
    query = """
    CREATE (t:Trade {trade_id: $trade_id, currency_pair: $currency_pair, 
                     volume: $volume, price: $price, timestamp: $timestamp})
    """
    tx.run(query, **trade)

with neo4j_driver.session() as session:
    for _, row in trade_df.iterrows():
        session.write_transaction(insert_trade_node, row.to_dict())
print("Inserted trades into Neo4j!")

# Create Relationships Between Trades
def create_trade_relationships(tx):
    query = """
    MATCH (t1:Trade), (t2:Trade) 
    WHERE t1.timestamp < t2.timestamp
    CREATE (t1)-[:NEXT_TRADE]->(t2)
    """
    tx.run(query)

with neo4j_driver.session() as session:
    session.write_transaction(create_trade_relationships)
print("Created relationships between consecutive trades!")

# Compute Z-scores for trade volume to identify outliers
trade_df["volume_zscore"] = zscore(trade_df["volume"])
trade_df["is_outlier"] = trade_df["volume_zscore"].abs() > 2

# Mark Outliers in Neo4j
def mark_trade_outlier(tx, trade_id, is_outlier):
    query = """
    MATCH (t:Trade {trade_id: $trade_id})
    SET t.is_outlier = $is_outlier
    """
    tx.run(query, trade_id=int(trade_id), is_outlier=bool(is_outlier))

with neo4j_driver.session() as session:
    for _, row in trade_df.iterrows():
        session.write_transaction(mark_trade_outlier, row["trade_id"], row["is_outlier"])
print("Marked outliers in Neo4j!")

# Generate Charts
def generate_charts():
    # Plot Trade Volume Over Time
    plt.figure(figsize=(8, 5))
    plt.plot(trade_df["trade_id"], trade_df["volume"], marker='o', linestyle='-', color='b', label='Trade Volume')
    plt.xlabel("Trade ID")
    plt.ylabel("Volume")
    plt.title("Trade Volume Over Time")
    plt.legend()
    plt.savefig("plot_graph.png")
    plt.close()
    
    # Plot Trade Outlier Distribution (Pie Chart)
    labels = ['Normal Trades', 'Outliers']
    sizes = [len(trade_df) - trade_df['is_outlier'].sum(), trade_df['is_outlier'].sum()]
    colors = ['green', 'red']
    plt.figure(figsize=(6, 6))
    plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
    plt.title("Trade Outliers Distribution")
    plt.savefig("pie_chart.png")
    plt.close()
    
    # Plot Trade Network Graph
    trade_network = nx.DiGraph()
    for _, row in trade_df.iterrows():
        node_color = "red" if row["is_outlier"] else "green"
        trade_network.add_node(row["trade_id"], color=node_color)
    for i in range(len(trade_df) - 1):
        trade_network.add_edge(trade_df.loc[i, "trade_id"], trade_df.loc[i+1, "trade_id"])
    plt.figure(figsize=(8, 5))
    nx.draw(trade_network, with_labels=True, node_color=[trade_network.nodes[n]["color"] for n in trade_network.nodes], edge_color="gray", node_size=700)
    plt.title("Trade Network Graph")
    plt.savefig("network_graph.png")
    plt.close()

# Generate PDF Report with All Graphs
def generate_pdf_report():
    generate_charts()
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", "B", 14)
    pdf.cell(200, 10, "Trade Outlier Analysis Report", ln=True, align='C')
    pdf.ln(10)
    
    # Add Table
    pdf.set_font("Arial", "B", 10)
    pdf.cell(20, 10, "ID", 1, 0, 'C')
    pdf.cell(40, 10, "Currency Pair", 1, 0, 'C')
    pdf.cell(30, 10, "Volume", 1, 0, 'C')
    pdf.cell(30, 10, "Price", 1, 0, 'C')
    pdf.cell(40, 10, "Timestamp", 1, 0, 'C')
    pdf.cell(30, 10, "Outlier", 1, 1, 'C')
    pdf.set_font("Arial", "", 10)
    
    for _, row in trade_df.iterrows():
        pdf.cell(20, 10, str(row["trade_id"]), 1, 0, 'C')
        pdf.cell(40, 10, row["currency_pair"], 1, 0, 'C')
        pdf.cell(30, 10, str(row["volume"]), 1, 0, 'C')
        pdf.cell(30, 10, str(row["price"]), 1, 0, 'C')
        pdf.cell(40, 10, row["timestamp"].strftime("%Y-%m-%d %H:%M"), 1, 0, 'C')
        pdf.cell(30, 10, "Yes" if row["is_outlier"] else "No", 1, 1, 'C')
    
    pdf.ln(10)
    pdf.set_font("Arial", "B", 12)
    pdf.cell(200, 10, "Trade Volume Over Time", ln=True, align='C')
    pdf.image("plot_graph.png", x=10, w=180)
    pdf.ln(10)
    pdf.cell(200, 10, "Trade Outliers Distribution", ln=True, align='C')
    pdf.image("pie_chart.png", x=10, w=100)
    pdf.ln(10)
    pdf.cell(200, 10, "Trade Network Graph", ln=True, align='C')
    pdf.image("network_graph.png", x=10, w=180)
    pdf.output("trade_report.pdf")
    return "trade_report.pdf"

# Gradio Interface
def display_trade_table():
    return trade_df

def display_volume_graph():
    return "plot_graph.png"

def display_outlier_pie_chart():
    return "pie_chart.png"

def display_network_graph():
    return "network_graph.png"

def download_report():
    return generate_pdf_report()

with gr.Blocks() as interface:
    with gr.Tabs():
        with gr.Tab("Table"):
            gr.Dataframe(display_trade_table, label="Trade Data Table")
        with gr.Tab("Graphs"):
            gr.Image(display_volume_graph, label="Trade Volume Over Time")
            gr.Image(display_outlier_pie_chart, label="Trade Outliers Distribution")
            gr.Image(display_network_graph, label="Trade Network Graph")
        with gr.Tab("Download Document"):
            gr.File(download_report, label="Download Report")

interface.launch()


Connected to Neo4j successfully!


  session.write_transaction(insert_trade, row.to_dict())


Inserted trades into Neo4j!


  session.write_transaction(create_relationships)


Created relationships between consecutive trades!
Marked outliers in Neo4j!


  session.write_transaction(mark_outliers, row["trade_id"], row["is_outlier"])


* Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.


