In [16]:
import os
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Access the API key
groq_api_key = os.getenv('groq_api_key')

In [17]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    temperature=0.2,
    groq_api_key=groq_api_key,
    model_name="llama-3.3-70b-versatile",
)

In [18]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://jobs.nike.com/job/R-43459?from=job%20search%20funnel")
page_data = loader.load().pop().page_content
print(page_data)

Apply for Graphic Designer II, Men's Global Football Apparel

Search JobsSkip navigationSearch JobsNIKE, INC. JOBSContract JobsJoin The Talent CommunityLife @ NikeOverviewBenefitsBrandsOverviewJordanConverseTeamsOverviewAdministrative SupportAdvanced InnovationAir Manufacturing InnovationAviationCommunicationsCustomer ServiceDesignDigitalFacilitiesFinance & AccountingGovernment & Public AffairsHuman ResourcesInsights & AnalyticsLegalManufacturing & EngineeringMarketingMerchandisingPlanningPrivacyProcurementProduct Creation, Development & ManagementRetail CorporateRetail StoresSalesSocial & Community ImpactSports MarketingStrategic PlanningSupply Chain, Distribution & LogisticsSustainabilityTechnologyLocationsOverviewNike WHQNike New York HQEHQ: Hilversum, The NetherlandsELC: Laakdal, BelgiumGreater China HQDiversity, Equity & InclusionOverviewMilitary InclusionDisability InclusionIndigenous InclusionInternshipsDesignGraphic Designer II, Men's Global Football ApparelBeaverton, OregonBec

In [19]:
from langchain_core.prompts import PromptTemplate

prompt_extract = PromptTemplate.from_template(
    """ 
    ### SCRAPED TEXT FROM WEBSITE :
    {page_data}
    ### INSTRUCTIONS:
    The scraped text is from the career's page of a website.
    Your job is to extract the job postings and return them in JSON format containing the 
    following keys: `role`, `experience`, `skills` and `description`. 
    Only return the valid JSON.
    ### VALID JSON (NO PREAMBLE):
    """
)

chain_extract = prompt_extract | llm
res = chain_extract.invoke(input = {'page_data': page_data})
print(res.content)
type(res.content)

```json
{
  "role": "Graphic Designer II, Men's Global Football Apparel",
  "experience": "1-4+ years",
  "skills": [
    "Graphic Design",
    "Color Design",
    "Product Design",
    "Sketching",
    "Illustration",
    "Typography",
    "Print and pattern",
    "Color theory",
    "Layout",
    "Adobe CC"
  ],
  "description": "We’re currently looking for a Graphic Designer II to contribute to the vision of Nike Global Football. Our team works on highly visible product across Global Federations, Clubs, and NWSL. We support these teams with bespoke collections comprised of game day, training and lifestyle product."
}
```


str

In [24]:
from langchain_core.output_parsers import JsonOutputParser

json_parser = JsonOutputParser()
json_res = json_parser.parse(res.content)
json_res

{'role': "Graphic Designer II, Men's Global Football Apparel",
 'experience': '1-4+ years',
 'skills': ['Graphic Design',
  'Color Design',
  'Product Design',
  'Sketching',
  'Illustration',
  'Typography',
  'Print and pattern',
  'Color theory',
  'Layout',
  'Adobe CC'],
 'description': 'We’re currently looking for a Graphic Designer II to contribute to the vision of Nike Global Football. Our team works on highly visible product across Global Federations, Clubs, and NWSL. We support these teams with bespoke collections comprised of game day, training and lifestyle product.'}

In [25]:
type(json_res)

dict

In [26]:
import pandas as pd

df = pd.read_csv('my_portfolio.csv')
df

Unnamed: 0,Techstack,Links
0,"React, Node.js, MongoDB",https://example.com/react-portfolio
1,"Angular,.NET, SQL Server",https://example.com/angular-portfolio
2,"Vue.js, Ruby on Rails, PostgreSQL",https://example.com/vue-portfolio
3,"Python, Django, MySQL",https://example.com/python-portfolio
4,"Java, Spring Boot, Oracle",https://example.com/java-portfolio
5,"Flutter, Firebase, GraphQL",https://example.com/flutter-portfolio
6,"WordPress, PHP, MySQL",https://example.com/wordpress-portfolio
7,"Magento, PHP, MySQL",https://example.com/magento-portfolio
8,"React Native, Node.js, MongoDB",https://example.com/react-native-portfolio
9,"iOS, Swift, Core Data",https://example.com/ios-portfolio


In [27]:
import uuid
import chromadb

client = chromadb.PersistentClient('vectorstore')
collection = client.get_or_create_collection(name="portfolio")

if not collection.count():
    for _, row in df.iterrows():
        collection.add(documents=row["Techstack"],
                       metadatas={"links": row["Links"]},
                       ids=[str(uuid.uuid4())])

In [28]:
links = collection.query(query_texts=["Experience in Python", "Expertise in React"], n_results=2).get('metadatas', [])
links

[[{'links': 'https://example.com/ml-python-portfolio'},
  {'links': 'https://example.com/python-portfolio'}],
 [{'links': 'https://example.com/react-portfolio'},
  {'links': 'https://example.com/react-native-portfolio'}]]