In [9]:
import dspy
import os
from dotenv import load_dotenv
from typing import Literal,Union
import pandas as pd
import time

In [None]:
load_dotenv()
# lm = dspy.LM("groq/llama-3.3-70b-versatile", api_key= os.getenv("GROQ_API_KEY"))
lm = dspy.LM(model='ollama_chat/gemma:27b-it-qat',api_base='http://127.0.0.1:11435', stop='\n\n', model_type='chat')
dspy.configure(lm=lm)
dspy.context(experimental=True)

<contextlib._GeneratorContextManager at 0x22d8859dd60>

In [11]:
lm('test')

APIConnectionError: litellm.APIConnectionError: Ollama_chatException - {"error":"model \"gemma:27b-it-qat\" not found, try pulling it first"}

In [4]:


class JobDetails(dspy.Signature):
    """Extract detailed job information from a listing."""
    job_text: str = dspy.InputField()
    company: str = dspy.OutputField(desc="Name of the company")
    job_title: str = dspy.OutputField(desc="standardized job format in the sequence seniority(eg junior,senior) , title(eg full-stack,web,ml), role (engineer,intern) ")
    location: str = dspy.OutputField(desc="Job location")
    domain: str = dspy.OutputField(desc="Domain of work AI web,development,full-stack")
    domain_specific_skills : str = dspy.OutputField(desc="Technical skills only relevant to the current domain eg opencv,pandas,numpy,tensorflow for AI")
    work_model: str = dspy.OutputField(desc="Work model (e.g., Hybrid, Full-time)")
    min_experience: int = dspy.OutputField(desc="minimum experience level in years")
    max_experience: int = dspy.OutputField(desc="maximum experience level in years")
    number_of_employeees : str = dspy.OutputField(desc='number of employees working in company')
    weeks_since_posting: int = dspy.OutputField(desc="weeks passed since posting date")
    min_salary: Union[None, int] = dspy.OutputField(desc="minimum pay range")
    max_salary: Union[None, int] = dspy.OutputField(desc="maximum pay range")
    no_of_applicants : int = dspy.OutputField(desc="number of people that have clicked apply")
    company_type: str = dspy.OutputField(desc="Type of company, size, and funding stage")
    key_responsibilities: str = dspy.OutputField(desc="Key responsibilities and tasks for the role")
    technical_requirements: str = dspy.OutputField(desc="Required technical skills and technologies")
    education: Literal['Bachelors','Masters','Phd'] = dspy.OutputField(desc="minimum Required educational background")
    benefits_culture: str = dspy.OutputField(desc="Company culture, benefits, and perks")
    unique_aspects: str = dspy.OutputField(desc="Unique aspects of the role or company")
    application_link: Union[None, int] = dspy.OutputField(desc="How to apply for the job")
    applicant_insights: str = dspy.OutputField(desc="Insights on typical applicants and experience levels")


In [5]:
job_parser = dspy.ChainOfThought(JobDetails)

In [None]:
import pandas as pd
import time

with open('jobs.txt', 'r', encoding='utf-8') as file:
    jobs = file.read().split('-' * 80)
    first_iter = True

    for idx, job_data in enumerate(jobs):
        try:
            data = job_parser(job_text=job_data)
        except Exception as e:
            print(f'failed to parse index {idx}: {e}, skipping....')
            continue  # Skip this iteration if parsing fails

        time.sleep(60)

        row_df = pd.DataFrame([data.toDict()])  # Create 1-row DataFrame

        if first_iter:
            df = row_df
            first_iter = False
        else:
            df = pd.concat([df, row_df], ignore_index=True)  # assign back to df

df.to_csv('structured_jobs_data.csv', index=False)
