# jobtimize doc
Main script of the module.  
These functions allow to perform web scrapping on job search platform, to collect job detail.
____
Requirements:

In [1]:
import sys
sys.path.append("..")

In [2]:
from Jobtimize import scrapindeed, scrapmonster
import pandas as pd
import warnings

**`jobscrap()`** is the main function which collects and standardizes data all sites.  
For each site, scraping is carried out.  
A classification algorithm will be used to complete the empty cells, particularly *type* and *category*.

In [3]:
def jobscrap(searchList, countryList, prox = False):
    """
    Extract and normalizes data from the search results
    :searchList: list of jobs or keywords to search
    :country: list of countries in 2-letter code
    :return: pandas dataframe
    """
    
    if prox:
        warnings.warn("Using a proxy extends execution time", UserWarning)
        
    countries = [
        'AE', 'AR', 'AT', 'AU', 'BE', 'BH', 'BR', 'CA', 'CH', 'CL', 'CN', 'CO',
        'CZ', 'DE', 'DK', 'ES', 'FI', 'FR', 'GB', 'GR', 'HK', 'HU', 'ID', 'IE',
        'IL', 'IN', 'IT', 'JP', 'KR', 'KW', 'LU', 'MX', 'MY', 'NL', 'NO', 'NZ',
        'OM', 'PE', 'PH', 'PK', 'PL', 'PT', 'QT', 'RO', 'RU', 'SA', 'SE', 'SG',
        'TH', 'TR', 'TW', 'US', 'VE', 'VN', 'ZA'
    ]
    countryList = [country for country in countryList if country in countries]

    indeed = scrapindeed.IndeedScrap(searchList, countryList, prox)
    monster = scrapmonster.MonsterScrap(searchList, countryList, prox)
    # add here other sites in the same format

    jobData = pd.DataFrame(indeed + monster,
                           columns = [
                               "header", "company", "city", "country",
                               "posted", "description", "type", "category",
                               "url"
                           ])
    return jobData

# Example of use

Let's do research on the data analyst post in France, at Nantes.  
Preview the 5 first rows

In [4]:
df = jobscrap(["Data Analyst nantes"], ["FR"])
df.head()

Unnamed: 0,header,company,city,country,posted,description,type,category,url
0,Data Analyst,SIGMA,Developpement BI F/H,FR,2020-01-31T13:37:08,Vous pensez que la BI doit être à la portée de...,,,https://www.indeed.com/viewjob?jk=a874a5d70bd7...
1,Ingénieur Intégration / Data Analyst,EXTERNATIC,Saint-Herblain (44),FR,2020-01-16T13:37:08,"DESCRIPTION DE L'OFFRE\nExternatic, le hub d’o...",,,https://www.indeed.com/viewjob?jk=775875e65f93...
2,Analyste Investissement F/H,Groupe Blot Immobilier,44000 Nantes,FR,2020-01-24T13:37:08,"L'entreprise\n\nFondé il y a plus de 50 ans, l...",,,https://www.indeed.com/viewjob?jk=c7556b561196...
3,Data Analyst,Sigma Informatique,Developpement BI F/H,FR,+ 2020-01-05T13:37:08,Le groupe SIGMA (800 collaborateurs) est un ac...,,,https://www.indeed.com/viewjob?jk=8c17063625f6...
4,DevOps Data Infrastructure Engineer H/F,TripAdvisor,Nantes (44),FR,+ 2020-01-05T13:37:09,"LaFourchette, part of the TripAdvisor group, i...",,,https://www.indeed.com/viewjob?jk=022a1afec7d5...
