# jobtimize doc
Main script of the module.  
These functions allow to perform web scrapping on job search platform, to collect job detail.
____
Requirements:

In [1]:
import sys
sys.path.append("..")

In [2]:
from Jobtimize import scrapindeed, scrapmonster
import pandas as pd

**`jobscrap()`** is the main function which collects and standardizes data all sites.  
For each site, scraping is carried out.  
A classification algorithm will be used to complete the empty cells, particularly *type* and *category*.

In [3]:
def jobscrap(searchList, countryList):
    """
    Extract and normalizes data from the search results
    :searchList: list of jobs or keywords to search
    :country: list of countries in 2-letter code
    :return: pandas dataframe
    """
    
    countries = [
        'AE', 'AR', 'AT', 'AU', 'BE', 'BH', 'BR', 'CA', 'CH', 'CL', 'CN', 'CO',
        'CZ', 'DE', 'DK', 'ES', 'FI', 'FR', 'GB', 'GR', 'HK', 'HU', 'ID', 'IE',
        'IL', 'IN', 'IT', 'JP', 'KR', 'KW', 'LU', 'MX', 'MY', 'NL', 'NO', 'NZ',
        'OM', 'PE', 'PH', 'PK', 'PL', 'PT', 'QT', 'RO', 'RU', 'SA', 'SE', 'SG',
        'TH', 'TR', 'TW', 'US', 'VE', 'VN', 'ZA'
    ]
    countryList = [country for country in countryList if country in countries]

    indeed = scrapindeed.IndeedScrap(searchList, countryList)
    monster = scrapmonster.MonsterScrap(searchList, countryList)
    # add here other sites in the same format

    jobData = pd.DataFrame(indeed + monster,
                           columns = [
                               "header", "company", "city", "country",
                               "posted", "description", "type", "category",
                               "url"
                           ])
    return jobData

# Example of use

Let's do research on the data analyst post in France, at Nantes.  
Preview the 5 first rows

In [4]:
df = jobscrap(["Data Analyst nantes"], ["FR"])
df.head()

Unnamed: 0,header,company,city,country,posted,description,type,category,url
0,"Responsable Acquisition, Conversion et Data H/F",Groupe Beneteau,Nantes (44),FR,+ 2019-12-27T14:44:06,Date: 11 janv. 2020\n MARKETING\nNantes France...,,,https://www.indeed.com/viewjob?jk=234428ce260d...
1,ANALYSTE DÉVELOPPEUR SOLUTIONS BIG DATA (H/F),Crédit mutuel,Nantes (44),FR,+ 2019-12-27T14:44:06,En accompagnement du développement de projets ...,,,https://www.indeed.com/viewjob?jk=a287fc268e0b...
2,INGENIEUR ETUDES AS400 IBMI H/F NANTES,Umanis,Saint-Herblain (44),FR,+ 2019-12-27T14:44:06,THE FRENCH LEADER OF THE DATA RECRUITS.\nJoini...,,,https://www.indeed.com/viewjob?jk=d6b94f23eb44...
3,ANALYSTE DÉVELOPPEUR SUR LES TECHNOLOGIES DÉCI...,Euro Information,Nantes (44),FR,+ 2019-12-27T14:44:06,"Présentation de la société\nEuro-Information, ...",,,https://www.indeed.com/viewjob?jk=3d2405f0ab3d...
4,Data Scientiste / Data Analyste / Freelance,STAR TEC INFORMATIQUE,Nantes (44),FR,2019-12-28T14:44:06,Nous recherchons pour l’un de nos clients un D...,,,https://www.indeed.com/viewjob?jk=52dbbdc056d2...
