In [None]:
"""
Project1 : Stock Trade Outlier Analysis using Graph Database

Objectives:
    Develop an FX Trade Analysis System – Build a system to analyze foreign exchange (FX) trades and detect anomalies.
    
    Construct a Graph-Based Trade Model – Store FX trade data in a Neo4j graph database, representing trades as nodes and relationships between them as edges.
    
    Identify Outliers in Trading Patterns – Implement statistical methods (Z-score, IQR) to detect unusual trades based on trade volume, price, and timestamp.
    
    Compare Actual Trades with Expected Guidelines – Define trade pattern benchmarks and identify deviations from expected behavior.
    
    Visualize and Report Insights – Generate interactive visualizations and summary reports highlighting trade outliers and deviations.
    

"""

In [1]:
!pip install pandas numpy py2neo matplotlib plotly




In [16]:
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
from pathlib import Path
from neo4j import GraphDatabase

# Connect to Neo4j
URL = "bolt://localhost:7687"
USERNAME = "neo4j"
PASSWORD = "password"  # Change this to your actual password

driver = GraphDatabase.driver(URL, auth=(USERNAME, PASSWORD))

# Sample data (replace with your actual data)
trades_df = pd.DataFrame({
    "timestamp": pd.date_range(start="2023-01-01", periods=100, freq="min"),
    "currency_pair": ["EUR/USD"] * 100,
    "price": [1.2 + 0.01 * i for i in range(100)],
    "volume": [1000 + 100 * i for i in range(100)]
})

# Add some outliers
trades_df.loc[10, "price"] = 1.5  # Outlier price
trades_df.loc[20, "volume"] = 50000  # Outlier volume

# Calculate Z-scores for outlier detection
trades_df["price_zscore"] = (trades_df["price"] - trades_df["price"].mean()) / trades_df["price"].std()
trades_df["volume_zscore"] = (trades_df["volume"] - trades_df["volume"].mean()) / trades_df["volume"].std()
trades_df["price_outlier"] = abs(trades_df["price_zscore"]) > 3
trades_df["volume_outlier"] = abs(trades_df["volume_zscore"]) > 3

# Define expected guidelines
expected_price_range = (1.2, 1.5)  # Example range for price in EUR/USD
expected_volume_range = (1000, 5000)  # Example range for volume

# Function to check if trades meet expected guidelines
def check_guidelines(df):
    df['price_within_guidelines'] = df['price'].between(expected_price_range[0], expected_price_range[1])
    df['volume_within_guidelines'] = df['volume'].between(expected_volume_range[0], expected_volume_range[1])
    return df

# Update the DataFrame with guideline checks
trades_df = check_guidelines(trades_df)

# Function to insert trades into Neo4j
def insert_trades_to_neo4j(df):
    with driver.session() as session:
        for _, row in df.iterrows():
            session.run(
                """
                CREATE (t:Trade {timestamp: $timestamp, currency_pair: $currency_pair, 
                                 price: $price, volume: $volume})
                """,
                timestamp=row['timestamp'],
                currency_pair=row['currency_pair'],
                price=row['price'],
                volume=row['volume']
            )

# Function to create relationships between trades in Neo4j
def create_trade_relationships():
    with driver.session() as session:
        session.run(
            """
            MATCH (t1:Trade), (t2:Trade) 
            WHERE t1.timestamp < t2.timestamp
            CREATE (t1)-[:NEXT_TRADE]->(t2)
            """
        )

# Insert trades and create relationships in Neo4j
insert_trades_to_neo4j(trades_df)
create_trade_relationships()

# Update the summary function to include guideline checks
def get_summary():
    summary_data = {
        "Total Trades": len(trades_df),
        "Price Outliers": int(trades_df['price_outlier'].sum()),
        "Volume Outliers": int(trades_df['volume_outlier'].sum()),
        "Price Deviations": int((abs(trades_df["price_zscore"]) > 1).sum()),
        "Volume Deviations": int((abs(trades_df["volume_zscore"]) > 1).sum()),
        "Price Outside Guidelines": int((~trades_df['price_within_guidelines']).sum()),
        "Volume Outside Guidelines": int((~trades_df['volume_within_guidelines']).sum())
    }
    return summary_data

# Table data function
def show_table():
    return trades_df

# Graphs function
def show_graphs():
    plt.figure(figsize=(10, 6))

    # Price graph
    plt.subplot(2, 1, 1)
    plt.plot(trades_df["timestamp"], trades_df["price"], label="Price")
    plt.scatter(
        trades_df[trades_df["price_outlier"]]["timestamp"], 
        trades_df[trades_df["price_outlier"]]["price"], 
        color="red", label="Price Outliers"
    )
    plt.title("FX Trade Prices Over Time")
    plt.xlabel("Timestamp")
    plt.ylabel("Price")
    plt.legend()

    # Volume graph
    plt.subplot(2, 1, 2)
    plt.plot(trades_df["timestamp"], trades_df["volume"], label="Volume")
    plt.scatter(
        trades_df[trades_df["volume_outlier"]]["timestamp"], 
        trades_df[trades_df["volume_outlier"]]["volume"], 
        color="red", label="Volume Outliers"
    )
    plt.title("FX Trade Volumes Over Time")
    plt.xlabel("Timestamp")
    plt.ylabel("Volume")
    plt.legend()

    plt.tight_layout()
    
    # Save the graph for PDF report if needed
    plt.savefig("graph.png")  
    return plt

# Function to generate a PDF report
def generate_report():
    report_path = "generated_report.pdf"
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    # Title
    pdf.cell(200, 10, txt="Outlier Analysis Report", ln=True, align='C')

    # Summary Data
    pdf.ln(10)
    summary_data = get_summary()
    
    for key, value in summary_data.items():
        pdf.cell(200, 10, txt=f"{key}: {value}", ln=True, align='L')
    
    pdf.ln(10)
    
    # Add the graph to the PDF report
    pdf.image("graph.png", x=10, y=pdf.get_y(), w=180)  
    
    pdf.output(report_path)
    
    return report_path

# Gradio interface setup
with gr.Blocks() as demo:
   gr.Markdown("# 📊 Outlier Analysis Report")
   gr.Markdown("This interface allows you to explore FX trade data, detect outliers, and generate reports.")

   with gr.Row():
       with gr.Column():
           gr.Markdown("### 📋 Table Data")
           table_button = gr.Button("Show Table Data")
           table_output = gr.Dataframe()

       with gr.Column():
           gr.Markdown("### 📈 Graphs")
           graph_button = gr.Button("Show Graphs")
           graph_output = gr.Plot()

   gr.Markdown("### 📄 Generate PDF Report")
   pdf_button = gr.Button("Generate PDF")
   pdf_output = gr.File()

   # Summary Display Section
   summary_textbox = gr.Textbox(label="Summary Statistics", interactive=False)

   # Button actions for Gradio interface components
   table_button.click(fn=show_table, outputs=table_output)
   graph_button.click(fn=show_graphs, outputs=graph_output)
   pdf_button.click(fn=generate_report, outputs=pdf_output)

   # Load summary automatically into the textbox at the start of the interface.
   demo.load(fn=lambda: "\n".join([f"{k}: {v}" for k,v in get_summary().items()]), outputs=summary_textbox)

# Launch the Gradio interface
if __name__ == "__main__":
   demo.launch()


* Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.
