In [None]:
from linkedin_api import Linkedin
import pandas as pd 
import time 
from tqdm import tqdm

In [None]:
def data_filter(entries):
    '''
    This function takes data from job_seeker() and extracts the relevant data
    to be passed in the empty lists above.
    
    The function accepts a list of dictionaries as input.
    '''
    
    #  empty list for storing data; will be used to create dataframe 
    company = []
    location = []
    job_title = []
    job_id = []
    
    features_list = [company, 
                     location, 
                     job_title, 
                     job_id
                     ]
    
    for entry in entries:
        job_location = entry['formattedLocation']
        title = entry['title']
        unique_id = entry['entityUrn']
        
        try:
            company_name = entry['companyDetails']['companyName']
        except KeyError:
            company_name = 'nan'
        
        
        #  append data to list 
        location.append(job_location)
        job_title.append(title)
        company.append(company_name)
        job_id.append(unique_id)
    
    return dataframe_fromlists(features_list)

In [None]:
def job_seeker(user, password, start=1, end=1) -> pd.DataFrame:
    '''
    
    This function gets data on job listings from Linkedin through the Linkedin API.
    
    The function passes in the username and password of the Linkedin account.
    
    The function returns dataframe
    
    '''
    
    extracted_df_list = []
    
    api = Linkedin(user, password)
    
    for day in tqdm(range(start, end+1)):
    
        #  extracting job listings from Linkedin; max entries returned = 1000
        jobs = api.search_jobs(limit=1000, listed_at = 86400 * (day + 1), location_name = 'National Capital Region, Philippines')
        
        extracted_df_list.append(data_filter(jobs))

    return concat_dataframelist(extracted_df_list)

In [None]:
def dataframe_fromlists(features_list) -> pd.DataFrame:
    '''
    
    This function creates a dataframe from the data lists.
    
    The function accepts a list of features as input.
    
    The function returns a dataframe.
    
    '''
    df_format = {'job_title': features_list[2], 
                'location': features_list[1],
                'company': features_list[0],
                'job_id': features_list[3]
                }
    
    #  creating a dataframe from the data lists
    df = pd.DataFrame(data = df_format)

    #  returning the dataframe
    return df

In [None]:
def concat_dataframelist(extracted_df_list) -> pd.DataFrame:
    '''
    
    This function concatenates a list of dataframes.

    The function accepts a list of dataframes to be concatenated.
    
    The function returns a dataframe.
    
    '''
    
    #  establishing first dataframe point
    prime_df = extracted_df_list[0]
    
    #  looping through remaining dataframes to concatenate
    for i in range(len(extracted_df_list)-1):
        prime_df = pd.concat([prime_df,extracted_df_list[i+1]], ignore_index=True)
    
    return prime_df 