In [2]:
import os
import sys

import logging

logging.getLogger("snowflake").setLevel(logging.WARNING)
logging.getLogger("snowflake.connector").setLevel(logging.WARNING)
logging.getLogger("snowflake.snowpark").setLevel(logging.WARNING)


%pwd
os.chdir("../")

# Add the absolute path to src/ so Python can find automatch
src_path = os.path.abspath("src")
if src_path not in sys.path:
    sys.path.append(src_path)
    
%pwd


'c:\\Users\\fiscarelli\\Desktop\\Progetti\\Manpower IT\\Auto-Match\\Candidates-to-Jobs-Auto-Match-Cortex-AI'

In [2]:
%pwd


'c:\\Users\\fiscarelli\\Desktop\\Progetti\\Manpower IT\\Auto-Match\\Candidates-to-Jobs-Auto-Match-Cortex-AI'

In [3]:
from autoMatch.utils.snowflake_utils import get_snowpark_session
session = get_snowpark_session()

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://login.microsoftonline.com/e2ba81b8-03fe-407c-96a1-f4bc0f512e7d/saml2?SAMLRequest=nZJNb%2BIwEIb%2FSuQ9J7HTFIIFVHyoKl1aUKFVtTfjTMCqY2dth1B%2B%2FZpQpO6hPfQWOc%2BMH887%2FZtDKYM9GCu0GiASYRSA4joXajtAz%2BvbMEOBdUzlTGoFA%2FQOFt0M%2B5aVsqKj2u3UE%2FytwbrAN1KWtj8GqDaKamaFpYqVYKnjdDV6mNMkwrQy2mmuJfpU8n0FsxaM84aXktwKr7dzrqJx3DRN1FxF2mzjBGMc417sqRPy68If%2FJu%2B4EmM0xPvCY8vP9zGQp1H8J3W5gxZerdeL8PlYrVGweiiOtHK1iWYFZi94PD8ND8LWG8wXvVSnGRR4%2BcWQm10BRE71gYiq3RTSPYGXJdV7Xz3yH%2FFBeSx1FvhBzCbDlD1JvIjLF7mh%2FrwWu6zcbfrsv2kV4zUbnGEyfaBk%2Fvfx%2Ft0edzfkS1Hwcsl4eSU8MzaGmbqlKvzRzi5DkkS4qs16dAkoymOOp30Dwqm3k8o5trKi3zrEZWCG2114bSSQkFrCcmGZWST%2BU4FhCnu8rDXYSQs0g3HxTVJoJvHp7QTdN4g2oqY4U%2Fn0o8%2Fd%2FlYykef02y61FLw9%2BBWm5K5r2MkEWlPRB4WLUqhZEKO8tyAtT5OKXUzMcCc33

In [4]:
from dataclasses import dataclass

@dataclass(frozen=True)
class LLMConfig:
    root_dir: str
    database: str
    schema: str
    input_table: str
    columns : dict
    llm_name : str
    columns: dict


In [5]:
from autoMatch.constants import *
from autoMatch.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_llm_config(self) -> LLMConfig:
        config = self.config.llm
        schema = self.schema.llm
        params = self.params.llm

        create_directories([config.root_dir])

        llm_config = LLMConfig(
            root_dir=config.root_dir,
            database=config.database,
            schema=config.schema,
            input_table=config.input_table,
            llm_name=params.llm_name,
            columns=schema.columns,

        )

        return llm_config

In [None]:
from autoMatch import logger
import json
        
class LLM:
    def __init__(self, config: LLMConfig):
        self.config = config


    def create_prompt(self, role, skills, df_candidates, limit=5):
        """
        Creates custom prompt based on potential candidates info and recruiter position info
        Returns prompt in string format
        """

        columns = self.config.columns  

        rows = df_candidates.collect()
        candidate_text = ""
        for row in rows:
            parts = [f"{columns[col]}:{row[col.upper()]}" for col in columns.keys()]
            candidate_text += " | ".join(parts) + "\n"

        skills_text = f"Skills desiderate: {skills}.\n" if skills else ""

        prompt = (
            f"Sei un selezionatore di personale.\n"
            f"Ruolo ricercato: {role}; dai priorita candidati che hanno avuto ruoli simili, "
            f"Non tenere in considerazione eta elocation\n"
            f"Skills richieste: {skills_text}; dai priorita a chi ha le skills piu rilevanti per il ruolo ricercato."
            f"Valuta i candidati seguenti e scegli i {limit} migliori. Dai ad ognuno un punteggio.\n"
            f"Candidati:\n{candidate_text}"
            f"Restituisci un JSON con: id, nome (candidateid), età, location, posizioni passate rilevanti, skills, motivazione, punteggio.\n\n"
        )

        logger.info("Prompt successfully created")

        return prompt

    def call_ai(self, session, prompt):
        """
        Calls AI model on custom prompt
        Returns json response
        """
        llm_name = self.config.llm_name

        query = f"""
        SELECT AI_COMPLETE(
            '{llm_name}',
            '{prompt.replace("'", "''")}'
        ) AS response
        """

        logger.info(f"Running model {llm_name} on candidate data")

        cur = session.connection.cursor()
        cur.execute(query)
        row = cur.fetchone()
        response = row[0]

        logger.info(f"Run completed")

        try:
            return json.loads(response)
        except:
            return [{"errore": response}]



        
            




In [None]:
try:
    config = ConfigurationManager()
    llm_config = config.get_llm_config()
    llm = LLM(config=llm_config)
    df = session.sql("SELECT * FROM IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_FEATURES LIMIT 10")
    #print(df.columns)
    prompt = llm.create_prompt("Data Scientist", "Python, Java", df, 5)
    response = llm.call_ai(session, prompt)
    print(response)

except Exception as e:
    raise e




[2025-11-26 17:23:58,971: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-11-26 17:23:59,009: INFO: common: yaml file: params.yaml loaded successfully]


[2025-11-26 17:23:59,055: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-11-26 17:23:59,059: INFO: common: created directory at: artifacts]
[2025-11-26 17:23:59,065: INFO: common: created directory at: artifacts/llm]
[2025-11-26 17:24:00,132: INFO: 2420375031: Prompt successfully created]
Sei un selezionatore di personale.
Ruolo ricercato: Data Scientist; dai priorita candidati che hanno avuto ruoli simili, Non tenere in considerazione eta elocation
Skills richieste: Skills desiderate: Python, Java.
; dai priorita a chi ha le skills piu rilevanti per il ruolo ricercato.Valuta i candidati seguenti e scegli i 5 migliori. Dai ad ognuno un punteggio.
Candidati:
ID:5555995 | Eta:None | Location:None | Ultimo lavoro:responsabile zona sud riviera maya | Secondo ultimo lavoro:gerente vendite - tye | Terzo ultimo lavoro:gerente vendite - winjet
ID:5538179 | Eta:42 | Location:None | Ultimo lavoro:senior marketing content creator | Secondo ultimo lavoro:senior digital marketing s

In [35]:
position = "Magazziniere"

df1 = session.sql(f""" 
                 select candidateid, age,  location, province_ext, last_job, second_last_job, third_last_job, skills
                 from IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_FEATURES_NEW
                 where latitude is not null and longitude is not null
                 and province_ext = 'Milano'
                 --AND age < 40
                 AND skills ILIKE '%cliente%'
                 AND (
                 LAST_JOB ILIKE '%{position}%'
                 OR SECOND_LAST_JOB ILIKE '%{position}%'
                 OR THIRD_LAST_JOB ILIKE '%{position}%' 
                 )
                 """).collect()
df1

position = "Magazziniere"
df2 = session.sql(f""" 
                 select candidateid, skills, languages, certifications --age,  location, province_ext, last_job, second_last_job, third_last_job, skills
                 from IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_FEATURES_NEW
                 where latitude is not null and longitude is not null
                 and province_ext = 'Milano'
                 --AND age < 40
                 AND skills ILIKE '%muletto%'
                --AND certifications ILIKE '%patent%'
                 --AND languages ILIKE '%tedesco%'
                 AND (
                 LAST_JOB ILIKE '%{position}%'
                 OR SECOND_LAST_JOB ILIKE '%{position}%'
                 OR THIRD_LAST_JOB ILIKE '%{position}%' 
                 )
                 """).collect()
df2

[Row(CANDIDATEID=5749450, SKILLS='patente muletto carrello elevatore, traspalle elettrico, microsoft office, word, excel, sistemi mac, graphic design, video editing', LANGUAGES='italiano, inglese', CERTIFICATIONS=None),
 Row(CANDIDATEID=5749146, SKILLS='utilizzo muletto, manutenzione idraulica ed elettrica, manutenzione del verde', LANGUAGES='italiano', CERTIFICATIONS='patentino del muletto, patente di guida b'),
 Row(CANDIDATEID=5751010, SKILLS='patentino muletto, competenze digitali pacchetto microsoft', LANGUAGES='italiano, inglese', CERTIFICATIONS='patentino del muletto, attestato di alta formazione ac monza'),
 Row(CANDIDATEID=5780183, SKILLS='uso macchine di pulizia, muletto', LANGUAGES='cingalese, italiano, inglese', CERTIFICATIONS=None),
 Row(CANDIDATEID=5745294, SKILLS='guida muletto, transpallet elettrici, carrelli, gestione magazzino, controllo qualità, imballaggio', LANGUAGES=None, CERTIFICATIONS='patente b, patentino muletto'),
 Row(CANDIDATEID=5786820, SKILLS='utilizzo mu

In [31]:
column = 'certifications'

df2 = session.sql(f""" 
                 select distinct({column}), count ({column})
                 from IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_FEATURES_NEW
                 group by {column}
                 order by count ({column}) desc
                 """).collect()
df2

"mm" == False

False

In [10]:
df2 = session.sql(f""" 
                 select candidateid, description
                 from IT_DISCOVERY.CONSUMER_INT_MODEL.MPG_IT_AUTOMATCH_CANDIDATE_CLEANED_NEW
                 where candidateid = 5766872
                 """).collect()
df2

[Row(CANDIDATEID=5766872, DESCRIPTION='\n\n\n\n\n\nevalyn horemans \xa0\nmilan • 351 896 9554\xa0• evalyn.horemans@gmail.com \xa0\nwork experience \xa0\nmy english school \xa0\nenglish teacher \xa0\n2019-present \xa0\ngenova \xa0\nteach english to students of all ages in both group and individual classes. lessons cover \xa0\n•\ngrammar, \xa0\nconversation, exam preparation, and industry-specific english such as business \xa0\ncommunication. create customized lesson materials and student feedback reports. develop \xa0\noccasional training resources and modules for other teachers. \xa0\nvipkid \xa0\nenglish teacher \xa0\n2020-2021 \xa0\nremote \xa0\ndelivery of thousands of one-on-one english lessons to students aged 4–12 across china. \xa0\nmaintained a high parent satisfaction rating through personalized instruction, engaging \xa0\nsupplementary activities, and consistent student progress in speaking and comprehension. \xa0\n•\nnational bank of canada \xa0\ndatascientist \xa0\n2018-201