### Código para los URL de cada vacante en la bolsa

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://jobs.apple.com/en-us/search?location=mexico-MEXC"

response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")

    # Encuentra todos los enlaces que dirigen a las vacantes
    vacancy_links = []

    for link in soup.find_all("a", class_="table--advanced-search__title", href=True):
        vacancy_links.append(link["href"])

    # Los enlaces pueden ser relativos, así que construye las URL completas
    base_url = "https://jobs.apple.com"
    full_vacancy_links = [base_url + link for link in vacancy_links]

    # Crear un DataFrame a partir de la lista de enlaces
    df_url = pd.DataFrame({"url": full_vacancy_links})

else:
    print("No se pudo acceder a la página")

In [None]:
df_url.head(12)

Unnamed: 0,url
0,https://jobs.apple.com/en-us/details/114438287...
1,https://jobs.apple.com/en-us/details/114438291...
2,https://jobs.apple.com/en-us/details/114438295...
3,https://jobs.apple.com/en-us/details/200125422...
4,https://jobs.apple.com/en-us/details/114438285...
5,https://jobs.apple.com/en-us/details/114438290...
6,https://jobs.apple.com/en-us/details/114438292...
7,https://jobs.apple.com/en-us/details/114438297...
8,https://jobs.apple.com/en-us/details/200502576...
9,https://jobs.apple.com/en-us/details/200509363...


### Función que recorre cada url y extrae la información de cada vacante.

In [None]:
def get_job_details(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        title_element = soup.find(class_="jd__header--title")
        title = title_element.text if title_element else None

        location_element = soup.select_one('div[id="job-location-name"] span[itemprop="addressCountry"]')
        location = location_element.get_text(strip=True) if location_element else None

        company_element = soup.select_one('div[class="job-team-name"]')
        company = company_element.text if company_element else None

        time_element = soup.find('time')    # Busca un elemento HTML <time> en el documento HTML analizado con BeautifulSoup.
                                            # El elemento <time> se utiliza comúnmente para representar fechas y horas en HTML
                                            # y suele tener un atributo datetime que almacena la fecha y hora en un formato específico.

        date = time_element['datetime'] if time_element else None   #Después de encontrar el elemento <time>, la línea de código extrae el valor del
                                                                    # atributo datetime del elemento y lo asigna a la clave.

        summary_element = soup.find("div", class_="jd__row--main jd__summary--main")
        summary = summary_element.text if summary_element else None

        key_qualifications_element = soup.find("div", class_="jd__row--main jd__summary")
        key_qualifications = key_qualifications_element.text if key_qualifications_element else None

        description_element = soup.find("div", id="jd-description", class_="jd__row--main jd__summary--main")
        description = description_element.text if description_element else None

        additional_req_element = soup.find("div", id="jd-additional-requirements", class_="jd__row--main jd__summary")
        additional_req = additional_req_element.text if additional_req_element else None

        educationyexperience_element = soup.find("div", id="jd-education-experience", class_="jd__row--main jd__summary--main")
        educationyexperience=  educationyexperience_element.text if  educationyexperience_element else None


        datos = {
            "Título": title,
            "Ubicación": location,
            "Compañía": company,
            "Fecha": date,
            "Resumen": summary,
            "Requisitos": key_qualifications,
            "Descripción": description,
            "Otros_requisitos": additional_req,
            "Educación_Experiencia": educationyexperience,
            "URL": url
        }

        df = pd.DataFrame([datos])
        return df
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Aplicar la función a cada URL y guardar los resultados en una lista de DataFrames
resultados = []

for url in df_url['url']:
    detalles_vacante = get_job_details(url)
    if detalles_vacante is not None:
        resultados.append(detalles_vacante)

# Concatenar los DataFrames en uno solo
resultados_df = pd.concat(resultados, ignore_index=True)

In [None]:
resultados_df.head()

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,Requisitos,Descripción,Otros_requisitos,Educación_Experiencia,URL
0,MX-Technical Specialist,Mexico,Apple Retail,2023-11-10,"After customers purchase our products, you’re ...",Ability to assess customers’ support needs whe...,"As a Technical Specialist, you help new owners...",• You're passionate about Apple and eager to s...,,https://jobs.apple.com/en-us/details/114438287...
1,MX-Genius,Mexico,Apple Retail,2023-11-10,"At the Apple Store, you maintain customers’ tr...",Strong people skills and a knack for problem s...,"As a Genius, you provide insightful advice and...",• You have an aptitude for acquiring skills in...,,https://jobs.apple.com/en-us/details/114438291...
2,MX-Creative,Mexico,Apple Retail,2023-11-10,You inspire creativity by sharing your knowled...,Passion for education and ability to instruct ...,"As a Creative, your main role at the Apple Sto...",• You’re comfortable selling as well as teachi...,,https://jobs.apple.com/en-us/details/114438295...
3,MX-Business Pro,Mexico,Apple Retail,2023-11-10,"As a Business Pro, you are responsible for dev...",Minimum 3 to 5 years of proven track record of...,"You lead customer engagement, deepen relations...","• You have outstanding communication skills, b...",,https://jobs.apple.com/en-us/details/200125422...
4,MX-Operations Expert,Mexico,Apple Retail,2023-11-10,"As an Operations Expert, you know better than ...",Ability to think quickly and perform problem-s...,"As an Operations Expert, you and your team hav...",• You can manage and meet multiple inventory d...,,https://jobs.apple.com/en-us/details/114438285...


### Modificación de los datos

In [None]:
# Eliminando MX del nombre de la vacante.

resultados_df['Título'] = resultados_df['Título'].str.replace('MX-', '', regex=False)
resultados_df['Título'] = resultados_df['Título'].str.replace('MX -', '', regex=False)

In [None]:
# Cambiando el nombre de la compañia.
resultados_df['Compañía'] = resultados_df['Compañía'].replace(resultados_df['Compañía'].values, 'Apple, Inc.')

In [None]:
# Eliminando salto de líneas y puntos

# Lista de columnas a limpiar
columnas_a_limpiar = ['Requisitos', 'Descripción', 'Otros_requisitos', 'Educación_Experiencia']

#Recorro con un for cada una de las columnas
for columna in columnas_a_limpiar:
    resultados_df[columna] = resultados_df[columna].str.replace('•', '', regex=False)
    resultados_df[columna] = resultados_df[columna].str.replace('\n', ' ', regex=False)

In [None]:
# Rellena los vacios con nulo
resultados_df['Educación_Experiencia'] = resultados_df['Educación_Experiencia'].fillna('')

In [None]:
# Rellena los vacios con nulo
resultados_df['Otros_requisitos'] = resultados_df['Otros_requisitos'].fillna('')

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,Requisitos,Descripción,Otros_requisitos,Educación_Experiencia,URL
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",Ability to assess customers’ support needs whe...,"As a Technical Specialist, you help new owners...",You're passionate about Apple and eager to sh...,,https://jobs.apple.com/en-us/details/114438287...


### Unión de columnas para analisis de características.

In [None]:
# Lista de columnas a unir
columnas_a_unir = ['Requisitos', 'Descripción', 'Otros_requisitos', 'Educación_Experiencia']

# Crear una nueva columna llamada 'Texto_Unido' que contiene la concatenación de los textos de las columnas seleccionadas
resultados_df['Req_txt'] = resultados_df[columnas_a_unir].apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,Requisitos,Descripción,Otros_requisitos,Educación_Experiencia,URL,Req_txt
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",Ability to assess customers’ support needs whe...,"As a Technical Specialist, you help new owners...",You're passionate about Apple and eager to sh...,,https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...


In [None]:
# Borro las columnas unidas

resultados_df = resultados_df.drop(columnas_a_unir, axis=1)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...


### PROCESAMIENTO DEL LENGUAJE.

#### Defino las listas de Skills, Tools ,Aptitudes y Lenguajes.

In [None]:
skills = ['Flowcharts', 'Media Coverage', 'Digital Trafficker', 'Wireframes',
    'Infrastructure', 'Social Listening', 'Audiovisual Production', 'Microsoft SQL', 'Branding', 'Marketing',
     'Social Media', 'Personnel Management', 'Marketing Plan', 'Adobe Programs', 'Javascript Programming',
    'Content Manager', 'C# Programming', 'UX Design', 'Illustration', 'Data Analytics', 'Advertising Sales',
    'Project Management Office (PMO)', 'Adobe Illustrator', 'Jira', 'Windows Server', 'Insight Detection',
    'Sales Closing', 'Account Management', 'Team Leadership', 'Inbound Marketing', 'Freelance Artist', 'Figma', 'FX Makeup',
     'ReactJS', 'Social Ads', 'Dashboards', 'ERP (Enterprise Resource Planning) System', 'Creative Strategy',
    'Video Editing', 'Pressure Management and Goal Achievement', 'Communication Skills', 'Programming', 'Software Support',
     'Content Management', 'Journalism', 'Bioprocesses', 'Excel', 'Client Presentations', 'Storytelling', 'Benchmarks',
    'Node.js', 'Planning', 'Screen Printing', 'Digital Marketing', 'Google Cloud Platform', 'Customer Service', 'PHP',
    'Front-End Design', 'Design Thinking', 'Data Analysis', 'Client Prospecting', 'Copywriting', 'SQL Database', 'Web Development',
    'Analyst', 'Business Intelligence Management', 'Database', 'AMEF', 'Typography Design', 'Human Resources', 'Web Administration',
     'Motion Graphics', 'Content Creation', 'Communication Sciences', 'Web Writing', 'Web Scraping', 'Texture Mapping', 'Digital Specialist',
     'Offline Marketing', 'REST API Development', 'Market Research', 'Process Improvement', 'Digital Design', 'Writing', 'Monitoring',
     'Lean Manufacturing', 'SAP', 'React', 'Performance Analyst', 'Scrum Master', 'Team Coordination', 'Photography', 'Accounting Systems',
    'Style Correction', 'PMP Guidelines', 'Decision-Making Skills', 'Project Manager', 'Call Center Sales', 'Coaching', 'E-commerce',
    'Image Editing', 'Bitcoin', 'Public Relations', 'Python', 'Team Management Skills', 'Content for Social Media', 'Ticket Handling',
    'Digital Content Creation', 'Community Manager', 'Digital Media', 'Cloud Services', 'Windows Forms', 'Communication',
    'CRM (Customer Relationship Management)', 'Digital Advertising Sales', 'Web Design', 'Strategic Planning', 'English',
     'Literary Content Creation', '.NET Programming', 'UX UI Design', 'Keyword Research', 'Negotiation Techniques Management',
     'HTML Programming', 'Requirements Analysis', 'Social Media Marketing', 'ETL (Extract, Transform, Load)', 'Contouring',
    'Advertising', 'Google Ads', 'Editorial Design', 'Debugging', 'Digital Campaigns', 'Apache Spark', 'Software as a Service (SaaS)',
     'Java Programming', 'Responsive Design', 'Business Development', 'IT Solutions Manager', 'Postman', 'Azure DevOps', 'Adobe Photoshop',
     'Canva', 'Administrative Skills', 'DevOps', 'Angular', 'Version Control', 'Front-End Developer', 'Facebook Ads Campaign Implementation',
     'Virtual Assistance', 'Web Roles', 'Administration', 'Marketing Strategies', 'Fullstack Developer', 'API Consumption',
    'Supplier Relations', 'Computer Maintenance', 'Project Management', 'Voiceover', 'Advertising Design', 'Project Coordination',
    'Sales', 'Backend Development', 'User Flow Design', 'Interpersonal Communication Skills', 'Call Center Administration',
     'Email Marketing', 'MySQL', 'Good Spelling', 'Digital Campaign Implementation', 'Brand Design', 'Interior Architecture',
    'UI Design', 'Graphic Design', 'Consulting', 'ETL Process', 'React Hooks']

In [None]:
tools = ['Instagram', 'Zend Framework', 'Actionscript', 'Asana', 'Nats', 'Magento', 'Sprinklr', 'Outlook',
         'Mailchimp', 'HubSpot', 'ClickUp', 'Microsoft SQL', 'Twitter Analytics', 'WordPress', 'Social Media',
 'AWS', 'Monday', 'Adobe Audition', 'Correction', 'PowerPoint', 'HTML', 'Adobe Illustrator', 'Jira', 'Socialbakers',
 'Vue.js', 'Staging', 'Project', 'Scrum Methodology', 'Spring', 'Social Ads', 'Figma', 'Google Tag Manager', 'C#',
'Web Filtering', 'Solidworks', 'Spark', 'Google Cloud', 'TypeScript', 'Express', 'MacOS', 'Adobe', 'MongoDB', 'Microsoft Office',
'Excel', 'Facebook Business Manager', 'Next.js', 'tRPC', 'Metricool', 'Node.js', 'Office Suite', 'Salesforce', 'Marketing Foundations',
'Brandwatch', 'Facebook Analytics', 'Access', 'Sketchup', 'Facebook', 'Google', 'Looker Studio', 'SQL', 'Flutter', 'Django', 'JavaScript',
 'Supermetrics', 'Adwords / Google Ads', 'Adobe InDesign', 'Adobe After Effects', 'Writing', 'Final Cut', 'Adobe Creative Cloud',
'React Native', 'DaVinci Resolve', 'macOS', 'Microsoft Project', 'Linux', 'SAP', 'React', 'Google Docs', 'SEO', 'Java', 'Sysomos',
 'Trello', 'Google Marketing Platform', 'Cashier', 'Linux Commands', 'GraphQL', 'Python', 'Blender', 'Facebook Insights',
'Adobe Lightroom', 'Adobe Experience', 'Xcode', 'G Suite', 'Fanpage Karma', 'Word', 'Miro', 'Shopify', 'Marketplace', 'Trady',
'YouTube', 'Adobe Suite', 'Autocad', 'Advisor', 'TalkWalker', 'Less', 'VSCode', 'PostgreSQL', 'Bootstrap', 'Emplifi', 'Sprout Social',
'Programmatic Advertising', 'Golang', 'Social Media Design', 'Adobe Premiere', 'Git', 'Visual Studio', 'LinkedIn Ads', 'Power BI',
 'Microsoft Azure', 'Material UI', 'Google Data Studio', 'CSS', 'Cinema 4D', 'Windows', 'Postman', 'Jupiter X', 'Adobe Photoshop',
'Tableau', 'Meltwater', 'Canva', 'Visual Basic for Applications', 'Docker', 'Facebook Business', 'Google Workspace', 'Cake PHP', 'Hootsuite',
'Webex', 'Facebook Ads', 'Zoho', 'CRM', 'Odoo', 'Google Analytics', 'Tailwind', 'Cisco', 'GitHub', 'LinkedIn', 'Microsoft Teams',
'Oracle SQL', 'Basecamp', 'Smart Contracts', 'Keynote', 'MySQL', 'Angular.js', 'BMC Remedy']

In [None]:
aptitudes = ['Service Attitude',
'Adaptable',
'Kind',
'Analytical',
'Passionate',
'Passionate About Technology',
'Attention to Detail',
'Autodidact',
'Autonomy in Work',
'Good Relationship',
'Good Attitude',
'Business Management',
'Search for Excellence (Initiative)',
'Adaptability',
'Committed',
'Commitment',
'Communication',
'Assertive Communication',
'Effective Communication',
'Efficient Communication',
'With Initiative',
'Reliable',
'Consistent',
'Content Creation and Editing',
'Creative',
'Dedicated',
'Mental Dexterity',
'Dynamic',
'Disciplined',
'Willing to Teach',
'Willing to Learn',
'Efficient',
'Empathy',
'Entrepreneurial',
'Empathetic',
'Enthusiastic',
'Excellent Presentation',
'Word Fluency',
'Flexible',
'Team Management',
'Team Management',
'Sales Enthusiast',
'Honest',
'Emotionally Intelligent',
'Leadership',
'Leadership and Team Management',
'Time Management',
'Multidisciplinary',
'Negotiation',
'Organized',
'Organized',
'Goal-Oriented',
'Results-Oriented',
'Project Owner',
'Patient',
'Creative Thinking',
'Critical Thinking',
'Logical Thinking',
'Strategic Thinking',
'Perfectionist',
'Persistent',
'Pragmatic',
'Pragmatic',
'Proactive',
'Productive',
'Punctual',
'Resilient',
'Problem Solving',
'Responsible',
'Quick Learner',
'Sense of Urgency',
'Sociable',
'Problem Solving',
'Tenacious',
'Work Under Pressure',
'Customer-oriented',
'Teamwork',
'Teamwork',
'Versatile',
'Effective Communication',
'Creative']

In [None]:
languages = [
    'Spanish',
    'English',
    'Chinese Mandarin',
    'Hindi',
    'Arabic',
    'Portuguese',
    'Bengali',
    'Russian',
    'French',
    'German',
    'Japanese',
    'Turkish',
    'Chinese Cantonese',
    'Italian',
    'Swedish',
    'Indonesian',
    'Vietnamese',
    'Korean',
    'Polish',
    'Ukrainian',
    'Maltese',
    'Czech',
    'Hungarian',
    'Dutch',
    'Greek',
    'Swedish',
    'Norwegian',
    'Danish',
    'Finnish',
    'Egyptian Arabic',
    'Hebrew',
    'Thai',
    'Malay',
    'Filipino',
    'Afrikaans',
    'Luganda'
]

#### Función de matcheo.

##### Skills

In [None]:
from transformers import pipeline
import re

def encontrar_skills(Req_txt):
    coincidencias = [skill for skill in skills if re.search(r'\b{}\b'.format(re.escape(skill.lower())), Req_txt.lower())]
    return coincidencias

In [None]:
resultados_df['skills_1'] = resultados_df['Req_txt'].apply(encontrar_skills)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills_1
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[]


In [None]:
# Función para convertir la lista de skills en un diccionario:

def lista_a_diccionarios(skills_1):
    return [
        {
            'name': skill,
            'level': 'BASIC',
            'experience': 0
        }
        for skill in skills_1
    ]

# Aplicar la función a la columna 'skills_1' y crear una nueva columna 'skills'
resultados_df['skills'] = resultados_df['skills_1'].apply(lista_a_diccionarios)

In [None]:
resultados_df.head(2)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills_1,skills
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[]
1,Genius,Mexico,"Apple, Inc.",2023-11-10,"At the Apple Store, you maintain customers’ tr...",https://jobs.apple.com/en-us/details/114438291...,Strong people skills and a knack for problem s...,[],[]


In [None]:
#Elimino la columna no normalizada.

resultados_df = resultados_df.drop('skills_1', axis=1)

##### Tools

In [None]:
import re

def encontrar_tools(Req_txt):
    coincidencias = [tool for tool in tools if re.search(r'\b{}\b'.format(re.escape(tool.lower())), Req_txt.lower())]
    return coincidencias

In [None]:
resultados_df['tools_1'] = resultados_df['Req_txt'].apply(encontrar_tools)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills,tools_1
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[]


In [None]:
# Función para convertir la lista de habilidades en un diccionario
def diccionarios_2(tools_1):
    return [
        {
            'name': tool,
            'level': 'BASIC',
            'experience': 0
        }
        for tool in tools_1
    ]

In [None]:
# Aplicar la función a la columna 'tools_1' y crear una nueva columna 'tools'
resultados_df['tools'] = resultados_df['tools_1'].apply(diccionarios_2)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills,tools_1,tools
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[],[]


In [None]:
#Elimino la columna no normalizada.

resultados_df = resultados_df.drop('tools_1', axis=1)

In [None]:
resultados_df.head(11)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills,tools
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[]
1,Genius,Mexico,"Apple, Inc.",2023-11-10,"At the Apple Store, you maintain customers’ tr...",https://jobs.apple.com/en-us/details/114438291...,Strong people skills and a knack for problem s...,[],[]
2,Creative,Mexico,"Apple, Inc.",2023-11-10,You inspire creativity by sharing your knowled...,https://jobs.apple.com/en-us/details/114438295...,Passion for education and ability to instruct ...,[],[]
3,Business Pro,Mexico,"Apple, Inc.",2023-11-10,"As a Business Pro, you are responsible for dev...",https://jobs.apple.com/en-us/details/200125422...,Minimum 3 to 5 years of proven track record of...,"[{'name': 'Account Management', 'level': 'BASI...","[{'name': 'CRM', 'level': 'BASIC', 'experience..."
4,Operations Expert,Mexico,"Apple, Inc.",2023-11-10,"As an Operations Expert, you know better than ...",https://jobs.apple.com/en-us/details/114438285...,Ability to think quickly and perform problem-s...,"[{'name': 'Coaching', 'level': 'BASIC', 'exper...","[{'name': 'Access', 'level': 'BASIC', 'experie..."
5,Expert,Mexico,"Apple, Inc.",2023-11-10,The Apple Store is a retail environment like n...,https://jobs.apple.com/en-us/details/114438290...,"Demonstrated proficiency in technology, partic...","[{'name': 'Sales', 'level': 'BASIC', 'experien...",[]
6,Business Expert,Mexico,"Apple, Inc.",2023-11-10,"At the Apple Store, you connect business profe...",https://jobs.apple.com/en-us/details/114438292...,Knowledge of how businesses use technology and...,"[{'name': 'Communication Skills', 'level': 'BA...",[]
7,"Specialist: Full-Time, Part-Time, and Part-Ti...",Mexico,"Apple, Inc.",2023-11-10,"As a Specialist, you help create the energy an...",https://jobs.apple.com/en-us/details/114438297...,"Strong interest in technology, particularly Ap...","[{'name': 'Communication Skills', 'level': 'BA...",[]
8,"Apple Music Lead, Mexico and Spanish-Speaking ...",Mexico,"Apple, Inc.",2023-10-10,"At Apple Music, we’re profoundly passionate ab...",https://jobs.apple.com/en-us/details/200502576...,Extensive work experience in the music industr...,"[{'name': 'Marketing', 'level': 'BASIC', 'expe...","[{'name': 'MacOS', 'level': 'BASIC', 'experien..."
9,Enterprise Channel Account Executive,Mexico,"Apple, Inc.",2023-10-09,The people here at Apple don’t just build prod...,https://jobs.apple.com/en-us/details/200509363...,10-12 years of proven experience working with ...,"[{'name': 'English', 'level': 'BASIC', 'experi...",[]


##### Aptitudes

In [None]:

def encontrar_aptitudes (Req_txt):
    coincidencias = [aptitud for aptitud in aptitudes if re.search(r'\b{}\b'.format(re.escape(aptitud.lower())), Req_txt.lower())]
    return coincidencias

In [None]:
resultados_df['aptitudes_1'] = resultados_df['Req_txt'].apply(encontrar_aptitudes)

In [None]:
def diccionarios_3(aptitudes_1):
    return [
        {
            'name': Aptitud

        }
        for Aptitud in aptitudes_1
    ]

In [None]:
# Aplicar la función a la columna 'aptitudes_1' y crear una nueva columna 'aptitudes'
resultados_df['aptitudes'] = resultados_df['aptitudes_1'].apply(diccionarios_3)

In [None]:
#Elimino la columna no normalizada.

resultados_df = resultados_df.drop('aptitudes_1', axis=1)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills,tools,aptitudes
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[],"[{'name': 'Passionate'}, {'name': 'Commitment'..."


##### Languages

In [None]:
def encontrar_languages (Req_txt):
    coincidencias = [language for language in languages if re.search(r'\b{}\b'.format(re.escape(language.lower())), Req_txt.lower())]
    return coincidencias

In [None]:
resultados_df['languages_1'] = resultados_df['Req_txt'].apply(encontrar_languages)

In [None]:
def diccionarios_4(languages_1):
    return [
        {
            'name': language,
            'level': 'BASIC'
        }
        for language in languages_1
    ]

In [None]:
# Aplicar la función a la columna 'languages_1' y crear una nueva columna 'languages'
resultados_df['languages'] = resultados_df['languages_1'].apply(diccionarios_4)

In [None]:
#Elimino la columna no normalizada.

resultados_df = resultados_df.drop('languages_1', axis=1)

In [None]:
resultados_df.head(1)

Unnamed: 0,Título,Ubicación,Compañía,Fecha,Resumen,URL,Req_txt,skills,tools,aptitudes,languages
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[],"[{'name': 'Passionate'}, {'name': 'Commitment'...",[]


In [None]:
#Reemplazo los que no tienen datos por nulos.

import numpy as np
resultados_df['languages'] = resultados_df['languages'].replace(' ', np.nan)

### NORMALIZACIÓN DE DATFRAME PARA PLATAFORMA.

In [None]:
import numpy as np

new_columns = ['id','companyId','createdAt','availableSlots','benefits','scholarity', 'workhours','locationConditions','nationalRemote','minSalary','maxSalary','minAge','maxAge','sex','yearsOfExperience', 'status', 'updatedAt','driversLicense', 'degree','validPassport','validVisa','nationalRelocation', 'internationalRelocation','availabilityToTravel','seniority',	'showSalaryRange','state', 'city', 'postalCode', 'slug','latitude', 'longitude', 'companyImg']

In [None]:
# Agregar las nuevas columnas con valores nulos
df = resultados_df.assign(**{col: np.nan for col in new_columns})

In [None]:
# Renombrando columnas

df = df.rename(columns={'Título': 'name'})
df = df.rename(columns={'Resumen': 'description'})
df = df.rename(columns={'Compañía': 'companyName'})
df = df.rename(columns={'Ubicación': 'country'})
df = df.rename(columns={'Fecha': 'originalDate'})
df= df.rename(columns={'URL':'externalLink'})


In [None]:
df.head(1)

Unnamed: 0,name,country,companyName,originalDate,description,externalLink,Req_txt,skills,tools,aptitudes,...,availabilityToTravel,seniority,showSalaryRange,state,city,postalCode,slug,latitude,longitude,companyImg
0,Technical Specialist,Mexico,"Apple, Inc.",2023-11-10,"After customers purchase our products, you’re ...",https://jobs.apple.com/en-us/details/114438287...,Ability to assess customers’ support needs whe...,[],[],"[{'name': 'Passionate'}, {'name': 'Commitment'...",...,,,,,,,,,,


In [None]:
df.info(1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 44 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   name                     11 non-null     object 
 1   country                  11 non-null     object 
 2   companyName              11 non-null     object 
 3   originalDate             11 non-null     object 
 4   description              11 non-null     object 
 5   externalLink             11 non-null     object 
 6   Req_txt                  11 non-null     object 
 7   skills                   11 non-null     object 
 8   tools                    11 non-null     object 
 9   aptitudes                11 non-null     object 
 10  languages                11 non-null     object 
 11  id                       0 non-null      float64
 12  companyId                0 non-null      float64
 13  createdAt                0 non-null      float64
 14  availableSlots           0 n

In [None]:
#Ordenar la columnas
# Lista con el orden deseado de las columnas
order_of_columns = ['id','companyId','name','description','createdAt','availableSlots','skills','aptitudes','tools','languages','benefits','scholarity','workhours','locationConditions','nationalRemote', 'minSalary','maxSalary','minAge', 'maxAge','sex','yearsOfExperience','status','country','updatedAt', 'driversLicense', 'degree','validPassport','validVisa','nationalRelocation','internationalRelocation','availabilityToTravel','seniority','showSalaryRange','state','city','postalCode','slug','latitude','longitude','companyName','companyImg', 'externalLink','originalDate']

# Reordenar las columnas del DataFrame en todos los niveles
# Reordenar las columnas del DataFrame
df = df[order_of_columns]

In [None]:
df.head(11)

Unnamed: 0,id,companyId,name,description,createdAt,availableSlots,skills,aptitudes,tools,languages,...,state,city,postalCode,slug,latitude,longitude,companyName,companyImg,externalLink,originalDate
0,,,Technical Specialist,"After customers purchase our products, you’re ...",,,[],"[{'name': 'Passionate'}, {'name': 'Commitment'...",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438287...,2023-11-10
1,,,Genius,"At the Apple Store, you maintain customers’ tr...",,,[],"[{'name': 'Commitment'}, {'name': 'Empathy'}, ...",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438291...,2023-11-10
2,,,Creative,You inspire creativity by sharing your knowled...,,,[],"[{'name': 'Creative'}, {'name': 'Flexible'}, {...",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438295...,2023-11-10
3,,,Business Pro,"As a Business Pro, you are responsible for dev...",,,"[{'name': 'Account Management', 'level': 'BASI...","[{'name': 'Passionate'}, {'name': 'Communicati...","[{'name': 'CRM', 'level': 'BASIC', 'experience...",[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/200125422...,2023-11-10
4,,,Operations Expert,"As an Operations Expert, you know better than ...",,,"[{'name': 'Coaching', 'level': 'BASIC', 'exper...","[{'name': 'Flexible'}, {'name': 'Leadership'},...","[{'name': 'Access', 'level': 'BASIC', 'experie...",[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438285...,2023-11-10
5,,,Expert,The Apple Store is a retail environment like n...,,,"[{'name': 'Sales', 'level': 'BASIC', 'experien...","[{'name': 'Passionate'}, {'name': 'Empathetic'...",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438290...,2023-11-10
6,,,Business Expert,"At the Apple Store, you connect business profe...",,,"[{'name': 'Communication Skills', 'level': 'BA...","[{'name': 'Communication'}, {'name': 'Flexible'}]",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438292...,2023-11-10
7,,,"Specialist: Full-Time, Part-Time, and Part-Ti...","As a Specialist, you help create the energy an...",,,"[{'name': 'Communication Skills', 'level': 'BA...","[{'name': 'Communication'}, {'name': 'Flexible...",[],[],...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/114438297...,2023-11-10
8,,,"Apple Music Lead, Mexico and Spanish-Speaking ...","At Apple Music, we’re profoundly passionate ab...",,,"[{'name': 'Marketing', 'level': 'BASIC', 'expe...","[{'name': 'Attention to Detail'}, {'name': 'Cr...","[{'name': 'MacOS', 'level': 'BASIC', 'experien...","[{'name': 'Spanish', 'level': 'BASIC'}, {'name...",...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/200502576...,2023-10-10
9,,,Enterprise Channel Account Executive,The people here at Apple don’t just build prod...,,,"[{'name': 'English', 'level': 'BASIC', 'experi...",[],[],"[{'name': 'English', 'level': 'BASIC'}]",...,,,,,,,"Apple, Inc.",,https://jobs.apple.com/en-us/details/200509363...,2023-10-09


### FUNCIÓN DE TRADUCCIÓN.

In [None]:

def translate_deep_translator(text, target_language="es"):
    if isinstance(text, list):
        return [translate_deep_translator(item, target_language) for item in text]
    elif isinstance(text, dict):
        return {
            key: translate_deep_translator(value, target_language)
            for key, value in text.items()
        }
    elif isinstance(text, str):
        translated_text = GoogleTranslator(
            source="auto", target=target_language
        ).translate(text)
        return translated_text
    else:
        return text


def translate_columns_deep_translator(
    df_a_traducir, column_names, target_language="es"
):
    for column_name in column_names:
        df_a_traducir[column_name] = df_a_traducir[column_name].apply(
            lambda x: translate_deep_translator(x, target_language)
        )
    return df_a_traducir

### FUNCIÓN PARA GENERAR SLUG (LINK DENTRO DE TED)




In [None]:
from nanoid import generate
def generate_slug(row):
    """
    Concatenar companyId, name y el resultado de nanoid
    """
    if row["companyId"] is not None and row["name"] is not None:
        return slugify(" ".join([row["companyId"], row["name"], generate(size=10)]))
    return None


#Uso
      df["slug"] = df.apply(generate_slug, axis=1)

ModuleNotFoundError: No module named 'nanoid'