In [93]:
import pandas as pd
from datetime import datetime
from typing import List, Dict, Union, Optional, Any, Tuple


class DataJoiner:
    def __init__(self, db_engine_func):
        self.get_db_engine = db_engine_func

    def get_table_data(
        self,
        table_name: str,
        filters: Dict[str, Any] = None,
        date_fields: List[str] = None,
        min_max_fields: Dict[str, Dict[str, Union[str, datetime, float]]] = None,
        select_columns: Union[str, List[str], None] = None,
        df: Optional[pd.DataFrame] = None,
        return_sql: bool = False
    ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]:
        filters = filters or {}
        date_fields = date_fields or []
        min_max_fields = min_max_fields or {}

        def parse_value(val, is_date=False):
            if is_date and isinstance(val, str):
                try:
                    return datetime.strptime(val, '%Y-%m-%d')
                except:
                    return datetime.strptime(val, '%Y-%m-%d %H:%M:%S')
            return val

        if df is not None:
            df_copy = df.copy()

            for field, value in filters.items():
                is_date = field in date_fields
                if isinstance(value, (list, tuple)):
                    df_copy = df_copy[df_copy[field].isin([parse_value(v, is_date) for v in value])]
                else:
                    df_copy = df_copy[df_copy[field] == parse_value(value, is_date)]

            for field, bounds in min_max_fields.items():
                if 'min' in bounds:
                    df_copy = df_copy[df_copy[field] >= parse_value(bounds['min'], field in date_fields)]
                if 'max' in bounds:
                    df_copy = df_copy[df_copy[field] <= parse_value(bounds['max'], field in date_fields)]

            if select_columns:
                if isinstance(select_columns, str):
                    select_columns = [select_columns]
                df_copy = df_copy[select_columns]

            return (df_copy, "-- Data filtered from preloaded DataFrame") if return_sql else df_copy

        engine = self.get_db_engine()
        column_str = ", ".join(select_columns) if isinstance(select_columns, list) else (select_columns or "*")
        query = f"SELECT {column_str} FROM {table_name}"
        where_clauses = []
        params = {}
        param_counter = 0

        for field, value in filters.items():
            is_date = field in date_fields
            prefix = f"{field}_{param_counter}"
            param_counter += 1
            if isinstance(value, (list, tuple)):
                parts = []
                for i, val in enumerate(value):
                    pname = f"{prefix}_{i}"
                    parts.append(f"{field} = %({pname})s")
                    params[pname] = parse_value(val, is_date)
                where_clauses.append("(" + " OR ".join(parts) + ")")
            else:
                pname = prefix
                where_clauses.append(f"{field} = %({pname})s")
                params[pname] = parse_value(value, is_date)

        for field, bounds in min_max_fields.items():
            if 'min' in bounds:
                where_clauses.append(f"{field} >= %(min_{field})s")
                params[f"min_{field}"] = parse_value(bounds['min'], field in date_fields)
            if 'max' in bounds:
                where_clauses.append(f"{field} <= %(max_{field})s")
                params[f"max_{field}"] = parse_value(bounds['max'], field in date_fields)

        if where_clauses:
            query += " WHERE " + " AND ".join(where_clauses)

        with engine.connect() as conn:
            if params:
                with conn.connection.cursor() as cursor:
                    cursor.execute(query, params)
                    rows = cursor.fetchall()
                    cols = [desc[0] for desc in cursor.description]
                    df = pd.DataFrame(rows, columns=cols)
            else:
                df = pd.read_sql(query, conn)

        sql_preview = query
        for key, val in params.items():
            val_str = f"'{val}'" if isinstance(val, str) else str(val)
            sql_preview = sql_preview.replace(f"%({key})s", val_str)

        return (df, sql_preview) if return_sql else df

    def universal_data_join(
        self,
        tables_to_join: List[str],
        join_type: str = "left",
        join_keys: Dict[str, str] = None,
        filters: Dict[str, dict] = None,
        select_columns: Union[List[str], str, None] = None,
        preloaded_data: Dict[str, pd.DataFrame] = None,
        date_fields: Dict[str, List[str]] = None,
        min_max_fields: Dict[str, Dict[str, Dict[str, Union[str, float]]]] = None,
        return_sql: bool = False
    ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]:
        assert len(tables_to_join) >= 2, "You must join at least two tables."
        assert join_keys is not None, "You must specify join keys for each table."

        dataframes = {}
        sql_queries = {}
        base_table = tables_to_join[0]

        # Ensure join key is in base table select_columns
        if select_columns and isinstance(select_columns, list):
            base_key = join_keys[base_table]
            if base_key not in select_columns:
                select_columns = [base_key] + select_columns

        for i, table in enumerate(tables_to_join):
            use_columns = select_columns if i == 0 else None
            if preloaded_data and table in preloaded_data:
                df = preloaded_data[table]
                sql_queries[table] = f"SELECT * FROM {table} -- Preloaded"
            else:
                df, sql = self.get_table_data(
                    table_name=table,
                    filters=(filters or {}).get(table, {}),
                    date_fields=(date_fields or {}).get(table, []),
                    min_max_fields=(min_max_fields or {}).get(table, {}),
                    select_columns=use_columns,
                    return_sql=True
                )
                sql_queries[table] = sql
            dataframes[table] = df

        # SQL Preview Construction
        base_sql = sql_queries[base_table].split(" WHERE ")[0]
        where_clause = ""
        if " WHERE " in sql_queries[base_table]:
            where_clause = " WHERE " + sql_queries[base_table].split(" WHERE ")[1]

        join_clause = ""
        merged = dataframes[base_table]

        for i in range(1, len(tables_to_join)):
            left_table = tables_to_join[i - 1]
            right_table = tables_to_join[i]
            left_key = join_keys[left_table]
            right_key = join_keys[right_table]
            join_clause += f" {join_type.upper()} JOIN {right_table} ON {left_table}.{left_key} = {right_table}.{right_key}"

            try:
                merged = pd.merge(
                    merged,
                    dataframes[right_table],
                    how=join_type,
                    left_on=left_key,
                    right_on=right_key,
                    suffixes=('', f'_{right_table}')
                )
            except KeyError as e:
                raise ValueError(f"Join failed: Missing key column - {e}")

        if select_columns:
            if isinstance(select_columns, str):
                select_columns = [select_columns]
            merged = merged[select_columns]

        sql_preview = base_sql + join_clause + where_clause
        return (merged, sql_preview) if return_sql else merged


In [108]:
from fastapi import FastAPI, Form
from fastapi.responses import JSONResponse
from sqlalchemy import create_engine
import pandas as pd
import json

app = FastAPI()

# ---------- DB ENGINE ----------
def get_engine():
    return create_engine("postgresql://postgres:password@localhost:5432/Data_Asset_Linkage")

dj = DataJoiner(get_engine)

# ---------- UNIVERSAL JOIN ----------
@app.post("/join", response_class=JSONResponse)
def perform_join(
    tables_to_join: str = Form(...),
    join_type: str = Form(...),
    join_keys: str = Form(...),
    filters: str = Form(""),
    select_columns: str = Form("")
):
    try:
        tables = [t.strip() for t in tables_to_join.split(",")]
        keys = json.loads(join_keys)
        filters_dict = json.loads(filters) if filters else {}
        columns = [c.strip() for c in select_columns.split(",")] if select_columns else None

        df, sql = dj.universal_data_join(
            tables_to_join=tables,
            join_type=join_type,
            join_keys=keys,
            filters=filters_dict,
            select_columns=columns,
            return_sql=True  # You must support this in your class
        )

        return {
            "sql_query": sql,
            "result": df.to_dict(orient="records")
        }

    except Exception as e:
        return {"error": str(e)}

# ---------- TABLE DATA ----------
@app.post("/table", response_class=JSONResponse)
def get_table_data_view(
    table_name: str = Form(...),
    filters: str = Form(""),
    select_columns: str = Form("")
):
    try:
        filters_dict = json.loads(filters) if filters else {}
        columns = [c.strip() for c in select_columns.split(",")] if select_columns else None

        df, sql = dj.get_table_data(
            table_name=table_name,
            filters=filters_dict,
            select_columns=columns,
            return_sql=True
        )

        return {
            "sql_query": sql,
            "result": df.to_dict(orient="records")
        }

    except Exception as e:
        return {"error": str(e)}

# ---------- RAW SQL ----------
@app.post("/query", response_class=JSONResponse)
def run_sql_query(sql: str = Form(...)):
    try:
        engine = get_engine()
        df = pd.read_sql(sql, con=engine)
        return {
            "sql_query": sql,
            "result": df.to_dict(orient="records")
        }
    except Exception as e:
        return {"error": str(e)}


In [110]:
import nest_asyncio
import uvicorn
import socket
from threading import Thread

nest_asyncio.apply()

def find_open_port(start=8000):
    for port in range(start, 8100):
        with socket.socket() as s:
            if s.connect_ex(("127.0.0.1", port)) != 0:
                return port
    raise RuntimeError("No open ports")

def start_ui_server():
    port = find_open_port()
    print(f"🔗 Visit your dashboard at: http://127.0.0.1:{port}")
    uvicorn.run(app, host="127.0.0.1", port=port)

def start_docs_server():
    port = find_open_port()
    print(f"✅ FastAPI running at: http://127.0.0.1:{port}/docs")
    uvicorn.run(app, host="127.0.0.1", port=port)

Thread(target=start_ui_server, daemon=True).start()
Thread(target=start_docs_server, daemon=True).start()


✅ FastAPI running at: http://127.0.0.1:8034/docs
🔗 Visit your dashboard at: http://127.0.0.1:8034


INFO:     Started server process [6572]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8034 (Press CTRL+C to quit)
INFO:     Started server process [6572]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('127.0.0.1', 8034): [winerror 10048] only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


INFO:     127.0.0.1:58683 - "GET /docs HTTP/1.1" 200 OK


Task exception was never retrieved
future: <Task finished name='Task-1490' coro=<Server.serve() done, defined at C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\server.py:68> exception=SystemExit(1)>
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\server.py", line 163, in startup
    server = await loop.create_server(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\asyncio\base_events.py", line 1584, in create_server
    raise OSError(err.errno, msg) from None
OSError: [Errno 10048] error while attempting to bind on address ('127.0.0.1', 8034): [winerror 10048] only one usage of each socket address (protocol/network address/port) is normally permitted

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\threading.py", line 1075, in 

INFO:     127.0.0.1:58683 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:58697 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58701 - "POST /join HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\mid

INFO:     127.0.0.1:58709 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58715 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58719 - "POST /join HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\mid

INFO:     127.0.0.1:58725 - "POST /join HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\mid

INFO:     127.0.0.1:58730 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58740 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58745 - "POST /join HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\mid

INFO:     127.0.0.1:58749 - "POST /join HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\prash\anaconda3\envs\Pandas_playground\Lib\site-packages\starlette\mid

INFO:     127.0.0.1:58752 - "POST /join HTTP/1.1" 200 OK
INFO:     127.0.0.1:58764 - "POST /query HTTP/1.1" 200 OK


  "result": df.to_dict(orient="records")


In [109]:
import os

os.makedirs("static", exist_ok=True)
os.makedirs("templates", exist_ok=True)

# Create a placeholder index.html if it doesn't exist
index_path = "templates/index.html"
if not os.path.exists(index_path):
    with open(index_path, "w") as f:
        f.write("""
<!DOCTYPE html>
<html>
<head>
    <title>Data Joiner Dashboard</title>
</head>
<body>
    <h1>Welcome to Data Joiner</h1>
    <form method="post" action="/join">
        <label>Tables to Join (comma separated):</label><br>
        <input name="tables_to_join"><br><br>

        <label>Join Type:</label><br>
        <input name="join_type" value="left"><br><br>

        <label>Join Keys (JSON):</label><br>
        <textarea name="join_keys">{ "customer": "customer_id", "account": "customer_id", "transaction": "account_no" }</textarea><br><br>

        <label>Filters (JSON):</label><br>
        <textarea name="filters">{}</textarea><br><br>

        <label>Select Columns (comma separated):</label><br>
        <input name="select_columns"><br><br>

        <button type="submit">Join</button>
    </form>

    {% if data is defined %}
    <h2>Results:</h2>
    <pre>{{ data.to_markdown(index=False) }}</pre>
    {% endif %}

    {% if query is defined %}
    <h2>SQL Preview:</h2>
    <pre>{{ query }}</pre>
    {% endif %}
</body>
</html>
        """)
