In [1]:
import os
import sys

import logging

logging.getLogger("snowflake").setLevel(logging.WARNING)
logging.getLogger("snowflake.connector").setLevel(logging.WARNING)
logging.getLogger("snowflake.snowpark").setLevel(logging.WARNING)


%pwd
os.chdir("../")

# Add the absolute path to src/ so Python can find automatch
src_path = os.path.abspath("src")
if src_path not in sys.path:
    sys.path.append(src_path)
    
%pwd


'c:\\Users\\fiscarelli\\Desktop\\Progetti\\Manpower IT\\Auto-Match\\Candidates-to-Jobs-Auto-Match-Cortex-AI'

In [2]:
%pwd


'c:\\Users\\fiscarelli\\Desktop\\Progetti\\Manpower IT\\Auto-Match\\Candidates-to-Jobs-Auto-Match-Cortex-AI'

In [3]:
from autoMatch.utils.snowflake_utils import get_snowpark_session
session = get_snowpark_session()

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://login.microsoftonline.com/e2ba81b8-03fe-407c-96a1-f4bc0f512e7d/saml2?SAMLRequest=nZJNT%2BMwEIb%2FSuQ9x3HcFFKrLeqH0FaCUtHQXXFzkkmxcOys7TTQX79uSiX2AIe9Rc4z48fzzvjmrZbBAYwVWk1QjAkKQBW6FGo%2FQU%2FZbZiiwDquSi61ggl6B4tupmPLa9mwWete1CP8acG6wDdSlvU%2FJqg1imluhWWK12CZK9h2dn%2FHKCasMdrpQkv0qeT7Cm4tGOcNLyWlFV7vxbmGRVHXdbgbYG32ESWERGQUeeqE%2FLjwb%2F5NX%2FBxRJIT7wmPbz7c5kKdR%2FCdVn6GLPuZZZtw87DNUDC7qC60sm0NZgvmIAp4erw7C1hvMN%2BOEkJT3Pm5hdAa3QDmx9YAtkp3leSvUOi6aZ3vjv1XVEEZSb0XfgCr5QQ1r6I08uG46p7JQY3mu53ix0V3b%2Fa5zbNKpyRd%2F9rH%2FKr6TQdrW6Bgd0mYnhJeWdvCSp1ydf6I0GEYk5DSjIxYkrBkiAd0%2BIyCpfcTiru%2B8iLfe%2BBaFEZbXTmtpFDQWwLNeRrnaUgGFYQJuS7C0RWPwyrJC1INYwrXZXRKm6LzBrFexEz%2Fdy7j6HOXj6Vc%2B5xWy42WongPbrWpufs6xhjH%2FYkow6pHGdRcyFlZGrDWxyml7hYGuPO770wLKJ

In [4]:
from dataclasses import dataclass

@dataclass(frozen=True)
class LLMConfig:
    root_dir: str
    database: str
    schema: str
    input_table: str
    columns : dict
    llm_name : str
    columns: dict


In [5]:
from autoMatch.constants import *
from autoMatch.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_llm_config(self) -> LLMConfig:
        config = self.config.llm
        schema = self.schema.llm
        params = self.params.llm

        create_directories([config.root_dir])

        llm_config = LLMConfig(
            root_dir=config.root_dir,
            database=config.database,
            schema=config.schema,
            input_table=config.input_table,
            llm_name=params.llm_name,
            columns=schema.columns,

        )

        return llm_config

In [None]:
from autoMatch import logger
import json
        
class LLM:
    def __init__(self, config: LLMConfig):
        self.config = config


    def create_prompt(self, role, skills, df_candidates, limit=5):
        """
        Creates custom prompt based on potential candidates info and recruiter position info
        Returns prompt in string format
        """

        columns = self.config.columns  

        rows = df_candidates.collect()
        candidate_text = ""
        for row in rows:
            parts = [f"{columns[col]}:{row[col.upper()]}" for col in columns.keys()]
            candidate_text += " | ".join(parts) + "\n"

        skills_text = f"Skills desiderate: {skills}.\n" if skills else ""

        prompt = (
            f"Sei un selezionatore di personale.\n"
            f"Ruolo ricercato: {role}; dai priorita candidati che hanno avuto ruoli simili, "
            f"Non tenere in considerazione eta elocation\n"
            f"Skills richieste: {skills_text}; dai priorita a chi ha le skills piu rilevanti per il ruolo ricercato."
            f"Valuta i candidati seguenti e scegli i {limit} migliori. Dai ad ognuno un punteggio.\n"
            f"Candidati:\n{candidate_text}"
            f"Restituisci un JSON con: id, nome (candidateid), età, location, posizioni passate rilevanti, skills, motivazione, punteggio.\n\n"
        )

        logger.info("Prompt successfully created")

        return prompt

    def call_ai(self, session, prompt):
        """
        Calls AI model on custom prompt
        Returns json response
        """
        llm_name = self.config.llm_name

        query = f"""
        SELECT AI_COMPLETE(
            '{llm_name}',
            '{prompt.replace("'", "''")}'
        ) AS response
        """

        logger.info(f"Running model {llm_name} on candidate data")

        cur = session.connection.cursor()
        cur.execute(query)
        row = cur.fetchone()
        response = row[0]

        logger.info(f"Run completed")

        try:
            return json.loads(response)
        except:
            return [{"errore": response}]




In [7]:
try:
    config = ConfigurationManager()
    llm_config = config.get_llm_config()
    llm = LLM(config=llm_config)
    df = session.sql("SELECT * FROM IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_FEATURES LIMIT 100")
    #print(df.columns)
    prompt = llm.create_prompt("Data Scientist", "Python, Java", df, 5)
    #print(prompt)
    response = llm.call_ai(session, prompt)
    print(response)

except Exception as e:
    raise e




[2025-10-22 11:44:48,466: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-10-22 11:44:48,471: INFO: common: yaml file: params.yaml loaded successfully]
[2025-10-22 11:44:48,484: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-10-22 11:44:48,487: INFO: common: created directory at: artifacts]
[2025-10-22 11:44:48,489: INFO: common: created directory at: artifacts/llm]


NameError: name 'logger' is not defined

In [None]:
import json

resp = response
if isinstance(response, str):
    resp = json.loads(resp)

# Build lookup from Snowpark DataFrame
lookup = {
    str(row["CANDIDATEID"]): {
        "età": row["AGE"],
        "location": row["LOCATION"]
    }
    for row in df.select("CANDIDATEID", "AGE", "LOCATION").collect()
}

# Enrich JSON
for cand in resp:
    cand_id = str(cand["caid"])  # use the correct key
    if cand_id in lookup:
        cand.update(lookup[cand_id])

JSONDecodeError: Unterminated string starting at: line 510 column 5 (char 10640)