In [1]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from dotenv import load_dotenv
import os
from pydantic import BaseModel, Field
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
import pandas as pd

# Load environment variables from .env file
load_dotenv()


True

In [2]:
url ="https://api.theirstack.com/v1/jobs/search"

headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": f"Bearer {os.getenv('their_stack_api_key')}"}

if os.path.exists("/home/giuseppe/projetos-pessoais/linkedin-bot/data/raw/jobs_1.csv"):
    df_jobs = pd.read_csv("/home/giuseppe/projetos-pessoais/linkedin-bot/data/raw/jobs_1.csv")
else:
    response = requests.post(url, headers=headers, json=data)
    request = response.json()
    df_jobs = pd.DataFrame(request["data"])

In [3]:

llm = ChatOpenAI(
    model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
    openai_api_key=os.getenv('TOGETHER_API_KEY'),
    openai_api_base="https://api.together.xyz/v1"
)

In [None]:
class SummarizeDescription(BaseModel):
    resume: str = Field(
        description="Summarize the relevant informations contained in descrption  job position"
    )

    smart_working: bool = Field(
        description="Flag with true or false if this jobs in remote or not"
    )

    required: str = Field(
        description="The required skills and qualifications for the job position"
    )
    nice_to_have: str = Field(
        description="The nice to have skills and qualifications for the job position"
    )
    company: str = Field(description="Summarize the company name and its mission")
    location: str = Field(description="Summarize the location of the job position")

    job_responsibilities: list[str] = Field(
        description="The job responsibilities associated with the position"
    )

    hards_skills: list[str] = Field(
        description="The hard skills required for the job position"
    )
    soft_skills: list[str] = Field(
        description="The soft skills required for the job position"
    )

    def config

In [24]:
llm_summarize = llm.with_structured_output(SummarizeDescription, include_raw=True)
prompt_linkedin = """
    You are a job description summarizer. 
    Your task is to summarize the relevant information contained in the job description.
    The output should be a JSON object with the following fields:
    - resume: A summary of the relevant information contained in the job description.
    - smart_working: A boolean flag indicating whether the job is remote or not.
    - required: The required skills and qualifications for the job position.
    - nice_to_have: The nice-to-have skills and qualifications for the job position.
    - company: A summary of the company name and its mission.
    - location: A summary of the location of the job position.
    - job_responsibilities: The job responsibilities associated with the position.
    - hards_skills: The hard skills required for the job position.
    - soft_skills: The soft skills required for the job position.

    Here is a sample JSON object:
    {{"resume": "A software engineer with experience in Python and JavaScript.",
      "smart_working": true,
      "required": "Experience with Python, JavaScript, and SQL.",
      "nice_to_have": "Experience with React and Docker.",
      "company": "Tech Corp, a leading technology company.",
      "location": "Remote",
      "job_responsibilities": ["Develop software applications", "Collaborate with cross-functional teams"],
      "hards_skills": ["Python", "JavaScript", "SQL"],
      "soft_skills": ["Communication", "Teamwork"]}}

    Here is the job description:
    """

In [25]:
system_message = prompt_linkedin.format(description=df_jobs.loc[0, "description"])

In [26]:
analysts = llm_summarize.invoke([SystemMessage(content=prompt_linkedin)]+[HumanMessage(content=df_jobs.loc[0, "description"])])


In [40]:
analysts["raw"].usage_metadata["total_tokens"]

1558

In [42]:
analysts["parsed"]

SummarizeDescription(resume='A JavaScript/React developer with 3+ years of experience to work on training data for AI models.', smart_working=True, required='3+ years of experience in software engineering, strong proficiency with JavaScript/React, fluency in English, attention to detail, and ability to articulate complex technical concepts.', nice_to_have="Bachelor's or Master's degree in Computer Science, experience with modern JavaScript frameworks, familiarity with frontend testing frameworks, knowledge of state management solutions, and experience with TypeScript.", company='A prominent player in the AI/LLM space, with a mission to create training data for advanced AI models.', location='Remote, with accepted locations in the US, Canada, LATAM, Europe, Africa, and Asia.', job_responsibilities=['Evaluating AI-generated code', 'Building and evaluating React components', 'Solving coding problems', 'Writing test cases', 'Creating instructions for others'], hards_skills=['JavaScript', '

In [10]:
analysts.model_dump()

{'resume': 'A JavaScript/React developer with 3+ years of experience to train large AI language models and create training data for advanced AI models.',
 'smart_working': True,
 'required': '3+ years of experience in software engineering, strong proficiency with JavaScript/React, fluency in English, attention to detail, and ability to articulate complex technical concepts.',
 'nice_to_have': "Bachelor's or Master's degree in Computer Science, experience with modern JavaScript frameworks, familiarity with frontend testing frameworks, knowledge of state management solutions, and experience with TypeScript.",
 'company': 'A prominent player in the AI/LLM space, with a mission to create training data for advanced AI models.',
 'location': '100% remote, with accepted locations in the US, Canada, LATAM, Europe, Africa, and Asia.',
 'job_responsibilities': ['Evaluating AI-generated code',
  'Building and evaluating React components',
  'Solving coding problems',
  'Writing test cases',
  'Cr

In [11]:
df_jobs.loc[0, "description"]

"### **Accepted Locations**\n\n\nWe accept applicants from the US, Canada, and most countries in LATAM and Europe. We are also accepting candidates from some countries in Africa and Asia. For the complete list of accepted locations, click here. This work is 100% remote.\n\n**Loom Video**\n\n\nOur Founder/CEO, Gabe Greenberg, created an in\\-depth Loom video that we highly recommend you watch! Check it out here: Loom Video\n\n**Overview**\n\n\nJoin our expert annotation team to create training data for the world's most advanced AI models. No previous AI experience is necessary. You'll get your foot in the door with one of the most prominent players in the AI/LLM space today. We're primarily seeking JavaScript/React developers with 3\\+ years of experience to train large AI language models, helping cutting\\-edge generative AI models write better frontend code. Projects typically include discrete, highly variable problems that involve engaging with these models as they learn to code. We 

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [16]:
vectorizer = TfidfVectorizer()

In [18]:
df_jobs.loc[0, "description"]

"### **Accepted Locations**\n\n\nWe accept applicants from the US, Canada, and most countries in LATAM and Europe. We are also accepting candidates from some countries in Africa and Asia. For the complete list of accepted locations, click here. This work is 100% remote.\n\n**Loom Video**\n\n\nOur Founder/CEO, Gabe Greenberg, created an in\\-depth Loom video that we highly recommend you watch! Check it out here: Loom Video\n\n**Overview**\n\n\nJoin our expert annotation team to create training data for the world's most advanced AI models. No previous AI experience is necessary. You'll get your foot in the door with one of the most prominent players in the AI/LLM space today. We're primarily seeking JavaScript/React developers with 3\\+ years of experience to train large AI language models, helping cutting\\-edge generative AI models write better frontend code. Projects typically include discrete, highly variable problems that involve engaging with these models as they learn to code. We 

In [52]:
from sentence_transformers import SentenceTransformer

# Load the model
model_name = 'Snowflake/snowflake-arctic-embed-l-v2.0'
model = SentenceTransformer(model_name)

# Define the queries and documents
queries = ['what is snowflake?', 'Where can I get the best tacos?']
documents = ['The Data Cloud!', 'Mexico City of Course!']

# Compute embeddings: use `prompt_name="query"` to encode queries!
query_embeddings = model.encode(queries, prompt_name="query") 
document_embeddings = model.encode(documents)

# Compute cosine similarity scores
scores = model.similarity(query_embeddings, document_embeddings)

# Output the results
for query, query_scores in zip(queries, scores):
    doc_score_pairs = list(zip(documents, query_scores))
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
    print("Query:", query)
    for document, score in doc_score_pairs:
        print(score, document)


  from .autonotebook import tqdm as notebook_tqdm


Query: what is snowflake?
tensor(0.2666) The Data Cloud!
tensor(0.0663) Mexico City of Course!
Query: Where can I get the best tacos?
tensor(0.2716) Mexico City of Course!
tensor(0.1085) The Data Cloud!


In [54]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/home/giuseppe/Downloads/giuseppe_tinti.pdf")
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [55]:
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

{'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with RenderCV', 'creationdate': '2025-02-06T18:58:47+00:00', 'author': 'Giuseppe Tinti', 'keywords': '', 'moddate': '2025-02-06T18:58:47+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': 'Giuseppe Tinti CV', 'trapped': '/False', 'source': '/home/giuseppe/Downloads/giuseppe_tinti.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}

Giuseppe Tinti
♂¶ap-¶arker-altSestu/CA /envel⌢pegiuseppe31tinti@gmail.com ♂phone-alt39 3513244561 /githubGiuseppe31-s /linkedin-inGiuseppe Tinti
Chi Sono
Data Scientist con oltre 2 anni di esperienza , specializzato nell’analisi dei dati e nella costruzione di
modelli predittivi. Competente in Python, SQL e tecniche statistiche, con conoscenze in ambito Cloud
(AWS) e nel deployment di modelli in produzione. Particolarmente interessato ai paradigmi di previsione e
regressione, con l’obiettivo di continuare a sviluppare e

In [61]:
query_embeddings = model.encode(pages[0].page_content, prompt_name="query") 
document_embeddings = model.encode([df_jobs.loc[i, "description"] for i in range(len(df_jobs))])

In [62]:
score = model.similarity(query_embeddings, document_embeddings)

In [63]:
print(score)

tensor([[0.3311, 0.3311, 0.3311, 0.3311, 0.5028, 0.3421, 0.4652, 0.3487, 0.4018,
         0.4394]])


In [65]:
import numpy as np

In [79]:
np.array(np.argsort(score)[0])[::-1]

  np.array(np.argsort(score)[0])[::-1]


array([4, 6, 9, 8, 7, 5, 3, 2, 1, 0])

In [80]:
print(df_jobs.loc[6, "description"])

Mavriq, parte di Moltiply Group, è la tech company a cui appartengono alcuni tra i più importanti brand di comparazione ed intermediazione online in Italia (MutuiOnline.it, Segugio.it, SOStariffe.it, Trovaprezzi.it, Switcho e molti altri) e all’estero (LeLynx.fr, Rastreator, Pricewise, Verivox). I servizi offerti dai brand di Mavriq aiutano con trasparenza i consumatori a trovare ciò di cui hanno bisogno, al miglior prezzo. Siamo un team di circa 1\.000 “smart disruptors” distribuiti in Europa, America Latina ed Asia.

  

Il successo di Mavriq è legato al successo dei nostri team. Per questo, siamo oggi alla ricerca di un nuovo o una nuova team member con cui continuare a scrivere la nostra storia nel mondo della comparazione ed intermediazione internazionale.

  

  

**Posizione:**
--------------

**Il ruolo**


Per supportare la crescita dei progetti di Machine Learning in Mavriq per i brand Segugio.it, PrestitiOnline e altri, siamo alla ricerca di un\-a Junior Data Scientist che, 