In [2]:
from pydantic import BaseModel
import pandas as pd
import dspy
import re
import csv
from typing import Optional, List, Union

class Company(BaseModel):
    company_name: str
    headcount: int
    industry: str
    revenue: float
    address: str
    description: Optional[str] = None
    in_projectfacts: bool = False

In [3]:
def _normalize_name(s: str) -> str:
    s = (s or "").lower().strip()
    s = re.sub(r"[&/]", " ", s)
    s = re.sub(r"[^a-z0-9äöüß\s\-\.]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    # strip common legal suffixes (optional)
    suffixes = {"gmbh","ag","kg","og","eg","se","ltd","limited","inc","llc","corp","co","sarl","srl","s.r.l"}
    parts = s.split()
    while parts and parts[-1].replace(".", "") in suffixes:
        parts.pop()
    return " ".join(parts).strip()


def _read_companies_csv_robust(path: str) -> pd.DataFrame:
    """
    companies.csv logical columns:
      name, headcount, industry/branch, revenue, address(street, postal city, country), description, news
    BUT address contains commas and is NOT quoted -> must join middle fields.
    """
    rows = []
    with open(path, "r", encoding="utf-8", newline="") as f:
        reader = csv.reader(f)
        _ = next(reader, None)  # header
        for fields in reader:
            if not fields or len(fields) < 6:
                continue

            name = fields[0].strip()
            headcount = fields[1].strip()
            industry = fields[2].strip()
            revenue = fields[3].strip()

            # address is everything between revenue and last 2 cols (description, news)
            address = ",".join(fields[4:-2]).strip()
            description = fields[-2].strip()

            # best-effort split address into 3 parts
            addr_parts = [p.strip() for p in address.split(",") if p.strip()]
            address_street = addr_parts[0] if len(addr_parts) >= 1 else None
            adress_postalcode_city = addr_parts[1] if len(addr_parts) >= 2 else None
            adress_country = addr_parts[-1] if len(addr_parts) >= 2 else None

            rows.append(
                dict(
                    name=name,
                    headcount=headcount,
                    industry=industry,
                    revenue=revenue,
                    address_street=address_street,
                    adress_postalcode_city=adress_postalcode_city,
                    adress_country=adress_country,
                    description=description,
                )
            )
    df = pd.DataFrame(rows)

    # types
    df["headcount"] = pd.to_numeric(df["headcount"], errors="coerce")
    df["revenue"] = pd.to_numeric(
        df["revenue"].astype(str).str.replace(r"[^\d\.,]", "", regex=True).str.replace(",", "."),
        errors="coerce",
    )
    return df


def _read_projectfacts_csv_robust(path: str) -> pd.DataFrame:
    """
    projectfacts.csv logical columns:
      name, address...
    address is unquoted, contains commas -> join.
    """
    rows = []
    with open(path, "r", encoding="utf-8", newline="") as f:
        reader = csv.reader(f)
        _ = next(reader, None)  # header
        for fields in reader:
            if not fields or len(fields) < 2:
                continue
            rows.append(
                dict(
                    name=fields[0].strip(),
                    address=",".join(fields[1:]).strip(),
                )
            )
    pf = pd.DataFrame(rows)
    pf["name_norm"] = pf["name"].astype(str).map(_normalize_name)
    return pf


from typing import Optional, Union, List
import pandas as pd

# Company muss existieren
# from your_models import Company
from typing import Optional
import pandas as pd


def search_companies(
    industry: Optional[str] = None,
    min_headcount: Optional[int] = None,
    min_revenue: Optional[float] = None,
    city: Optional[str] = None,
    companies_path: str = "data/companies.csv",
    projectfacts_path: str = "data/projectfacts.csv",
) -> List[Company]:
    """Return companies with headcount > min_headcount and revenue > min_revenue."""

    df = pd.read_csv(companies_path, sep=";")
    pf = pd.read_csv(projectfacts_path, sep=";")

    df_filtered = df.copy()

    # --- Industry filter
    if industry:
        ind = industry.strip().lower()
        df_filtered = df_filtered[
            df_filtered["industry"].astype(str).str.strip().str.lower() == ind
        ]

    # --- Minimum headcount
    if min_headcount is not None:
        df_filtered["headcount"] = pd.to_numeric(df_filtered["headcount"], errors="coerce")
        df_filtered = df_filtered[df_filtered["headcount"] > min_headcount]

    # --- Minimum revenue
    if min_revenue is not None:
        df_filtered["revenue"] = pd.to_numeric(df_filtered["revenue"], errors="coerce")
        df_filtered = df_filtered[df_filtered["revenue"] > min_revenue]

    # --- City filter
    if city:
        needle = city.strip().lower()
        df_filtered = df_filtered[
            df_filtered["address"].astype(str).str.lower().str.contains(needle, na=False)
        ]

    # --- CRM check (exact name match)
    pf_names = set(
        pf["name"].astype(str).str.strip().str.lower().dropna().tolist()
    )

    df_filtered["in_projectfacts"] = (
        df_filtered["name"].astype(str).str.strip().str.lower().isin(pf_names)
    )

    # --- Convert to BaseModel list
    results: List[Company] = []

    for _, row in df_filtered.iterrows():
        results.append(
            Company(
                company_name=row["name"],
                headcount=int(row["headcount"]),
                industry=row["industry"],
                revenue=float(row["revenue"]),
                address=row["address"],
                description=row.get("description"),
                in_projectfacts=bool(row["in_projectfacts"]),
            )
        )

    return results

In [4]:
import pandas as pd
from pprint import pprint


In [5]:
results = search_companies(
    min_headcount=None,
    min_revenue=None,
    city="Eisenstadt"
)

print(len(results))
for i in range(min(len(results), len(results))):
    pprint(results[i].model_dump())


0


In [6]:
import dspy

class Grablin(dspy.Signature):
    """You are an AI agent specialized in supporting business development and
    acquisition efforts for industrial projects in the logistics sector.

    Your primary objective is to identify, filter, and evaluate industrial
    companies that may be relevant for logistics-related industry projects.

    You are provided with a list of tools. You must decide autonomously
    which tool to use in order to best fulfill the user’s request."""

    user_request: str = dspy.InputField(
        desc="User request describing acquisition criteria or filtering requirements."
    )

    process_result: str = dspy.OutputField(
        desc=(
                "Structured summary of identified companies, including CRM status "
                "(new lead vs existing account), and their strategic relevance "
                "for logistics-related industrial projects."
        )
     )

In [7]:
agent = dspy.ReAct(
    Grablin,
    tools = [
        search_companies,
    ]
)

In [8]:
from dotenv import load_dotenv
from pathlib import Path
import dspy

dotenv_path = Path("/home/porcorosso/hackathon/.env")
load_dotenv(dotenv_path=dotenv_path, override=False)

dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))  # use a model you have access to

In [None]:
from IPython.display import display
import ipywidgets as widgets

request_input = widgets.Textarea(
    placeholder="Enter your acquisition criteria (e.g. Renewable Energy companies in Linz with >100 employees)...",
    description="Request:",
    layout=widgets.Layout(width="600px", height="120px")
)

submit_button = widgets.Button(description="Run Agent")
output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        user_request = request_input.value
        result = agent(user_request=user_request)
        print(result)

submit_button.on_click(on_button_click)

display(request_input, submit_button, output)


Textarea(value='', description='Request:', layout=Layout(height='120px', width='600px'), placeholder='Enter yo…

Button(description='Run Agent', style=ButtonStyle())

Output()