In [2]:
import csv
from datetime import datetime, date
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from random import random
import time

import warnings # current version of seaborn generates a bunch of warnings that we'll ignore
warnings.filterwarnings("ignore")

In [3]:
# Use this to test for captcha block or IP ban
def get_URL(position,location):
    #from torrequest import TorRequest
    """[Build a template url for a dummy call to verify the site isn't returning a captcha]
    Args:
        position ([string]): [job for query]
        location ([string]): [location for query]
    Returns:
        [string]: [formatted url]
    """
    template = 'https://www.indeed.com/jobs?q={}&l={}&fromage=2&sort=date'
                
    position = position.replace(' ', '%20')
    location = location.replace(' ', '+')
    url = template.format(position,location)
    return url


# from torrequest import TorRequest
# tr=TorRequest(password='your_super_secure_password')
position = 'data scientist'
location = 'iowa'
# tr.reset_identity()
response = requests.get(get_URL(position,location))
# This will either return an HTML block for a captcha or of a search result
#response.text

In [4]:
def get_desc_features(job_url):
    response_job_desc = requests.get(job_url)
    soup = BeautifulSoup(response_job_desc.text, 'html.parser')

    
    try:
        salary_and_jType = soup.find('div', id='salaryInfoAndJobType').text.strip()
    except:
        salary_and_jType = None
    if salary_and_jType == None:
        try:
            salary_and_jType = soup.find('div',id="icl-u-xs-block jobsearch-ReqAndQualSection-item--title").text.replace("\n", "").strip()
        except:
            salary_and_jType = None
    #TODO get benefits from its designated section
    
    
    try:
        sal_guide_items = []
        items = soup.find('ul',class_='css-1lyr5hv eu4oa1w0')
        for i in items:
            sal_guide_items.append(i.text)
    except:
        sal_guide_items = None
        
        
    try:
        salfromsection = soup.find('span',class_='icl-u-xs-mr--xs').text
    except:
        salfromsection = None
        
        
    try:
        job_type_items = []
        job_type_from_section = soup.find('div',class_='jobsearch-JobDescriptionSection-sectionItem').next_sibling.children
        for i in job_type_from_section:
            if i.text == 'Job Type':
                continue
            else:
                job_type_items.append(i.text)
    except:
        job_type_items = None
        
    
    try:
        requirements = soup.find(class_="icl-u-xs-block jobsearch-ReqAndQualSection-item--title").text.replace("\n", "").strip()      

    except:
        requirements = None
        
        
    try:
        description = soup.find(id="jobDescriptionText").text.replace('\n', '')
    except:
        description = None
        
        
    # A nifty little workaround for evading detection.
    time.sleep(.3+random()*3)
    #TODO assess h2 tags commonalities to determine if these section descriptions are from Indeed or are at least of only a few variations.
        #you could then distinguish the description into sections and conduct NLP etc each.
    raw_desc_soup = soup
    return salary_and_jType, sal_guide_items, salfromsection, job_type_items, requirements, description, raw_desc_soup
    





In [5]:
#TODO condense these with lists, particularly fields that have .text.strip()
def get_features(post):
    datapoint_dict = {}

    title = post.find('h2',
              attrs={'class': lambda e: e.startswith('jobTitle') if e else False}).text.replace('new', '')

    company = post.find('span', 'companyName').text.strip()
    try:
        rating = post.find('span', 'ratingNumber').text
    except:
        rating = None

    location = post.find('div', 'companyLocation').text.strip()

    postDate = post.find('span', 'date').text

    extractDate = datetime.today().strftime('%Y-%m-%d')

    summary = post.find('div', 'job-snippet').text.strip().replace('\n', ' ')

    url = 'https://www.indeed.com'+post.get('href')

    try:
        estimated_salary = post.find('span','estimated-salary').text.strip()
    except:
        estimated_salary = None
    try:
        salary = post.find('div','metadata salary-snippet-container').text.strip()
    except:
        salary = None


        
            
    salary_and_jType, sal_guide_items, salfromsection, job_type_items, requirements, description, raw_desc_soup = get_desc_features(url)
    datapoint_dict = {
                        'title':title,
                        'company':company,
                        'rating':rating,
                        'location':location,
                        'salary':salary,
                        'estimated_salary':estimated_salary,
                        'postDate':postDate,
                        'extractDate':extractDate,
                        'summary':summary,
                        'url':url,
                        'salary_and_jType':salary_and_jType,
                        'sal_guide_items':sal_guide_items,
                        'salfromsection':salfromsection,
                        'job_type_items':job_type_items,
                        'requirements':requirements,
                        'description':description,
                        'raw_desc_soup':raw_desc_soup}
    if len(datapoint_dict) > 0:
        return datapoint_dict
    else:
        pass

In [6]:
def main(position, location):
    """[Conducts the web scraping process]
    Args:
        position ([string]): [job position for indeed.com query]
        position ([string]): [job location for indeed.com query]
        
        Returns:
        [csv]: [scraped data]
    """
    data = pd.DataFrame()
    
    # extract the job data
    while True:
        response = requests.get(get_URL(position, location))
        soup = BeautifulSoup(response.text, 'html.parser')
        searchResults = soup.find('div', id='mosaic-provider-jobcards')
        refinedsearchResults = searchResults.find_all('a', attrs={'class': lambda e: e.startswith('tapItem') if e else False})
        

        raw_posts = []
        for post in refinedsearchResults:
            raw_posts.append(post)
        
        n = 0
        for post in raw_posts:
            datapoint = get_features(post)
            data = data.append(datapoint, ignore_index=True)
        # Again, a nifty little workaround for evading detection.
            n+=1
            print(n)
            
        try:
            url = 'https://www.indeed.com' + soup.find('a', {'aria-label': 'Next'}).get('href')
        except:
            break

    name = position.replace(' ','_')
    loc = location.replace(' ','_')
    day = date.today()
    # save the job data
    data.to_csv(f'../app/data/scraped_{name}_{loc}_{day}.csv', index=False)
    return data

In [7]:
position = 'data scientist'
location = 'remote'
data = main(position,location )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
1

In [8]:
data

Unnamed: 0,company,description,estimated_salary,extractDate,job_type_items,location,postDate,rating,raw_desc_soup,requirements,sal_guide_items,salary,salary_and_jType,salfromsection,summary,title,url
0,Comcentric,We are currently seeking a Senior Data Scienti...,,2022-04-17,[Full-time],+21 locationsRemote,PostedJust posted,4.2,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,,"$139,000 - $276,000 a year","$139,000 - $276,000 a year - Full-time","$139,000 - $276,000 a year","Mentor and grow other software engineers, data...",Data Scientist,https://www.indeed.com/rc/clk?jk=4c2d6184ae3a9...
1,Revel,About RevelRevel's mission is to accelerate EV...,Estimated $118K - $150K a year,2022-04-17,,"Remote in Brooklyn, NY",PostedJust posted,2.6,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,"[, Not provided by employer, $118K to $150K pe...",,Full-time,,"Lead the development of our Ride Assignment, R...",Senior Data Scientist - Routing Algorithm,https://www.indeed.com/company/Revel/jobs/Seni...
2,nThrive,nThrive® provides leading healthcare revenue c...,,2022-04-17,[Full-time],Remote,PostedToday,2.9,"[html, \n, [\n, [\n, <script crossorigin=""anon...",Bachelor's (Preferred)US work authorization (P...,,"$130,000 - $160,000 a year","$130,000 - $160,000 a year - Full-time","$130,000 - $160,000 a year",Serve as a subject matter expert on the team f...,Lead machine learning engineer,https://www.indeed.com/company/nThrive/jobs/Le...
3,Xen.ai,About the positionXen.AI is looking for Direct...,Estimated $95.3K - $121K a year,2022-04-17,,"Remote in Detroit, MI+40 locations",Hiring ongoing,,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,"[, Not provided by employer, $95.3K - $121K a ...",,"Full-time, Part-time, Contract",,Nice to have - familiarity of latest IT trends...,"Director of Marketing - AI, Data Science Techn...",https://www.indeed.com/company/Xen.ai/jobs/Dir...
4,SparkCognition,"Voted Best Places to Work in Austin, Best Payi...",Estimated $106K - $135K a year,2022-04-17,,"Remote in Austin, TX",PostedToday,4.4,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,"[, Not provided by employer, $106K - $135K a y...",,,,Working closely with data scientists to unders...,Machine Learning Engineer,https://www.indeed.com/rc/clk?jk=40b5d79652e96...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,DIRECTV,"At DIRECTV, we’re connecting the world through...",,2022-04-17,[Full-time],"Remote in El Segundo, CA 90245",Posted1 day ago,3.7,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,,"$169,300 - $338,500 a year","$169,300 - $338,500 a year - Full-time","$169,300 - $338,500 a year",Do you have a passion for digging for answers ...,Senior Director of Data Science,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
581,CyberCoders,Principal Data Scientist If you are a P...,,2022-04-17,[Full-time],"Remote in Seattle, WA 98164+2 locations",Posted1 day ago,3.7,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,,"$150,000 - $200,000 a year","$150,000 - $200,000 a year - Full-time","$150,000 - $200,000 a year",In order to provide the world-class customer e...,Principal Data Scientist,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
582,Integration Developer Network LLC,Machine Learning Engineer24+ MonthsClient 7-El...,,2022-04-17,[Full-time],Remote,Posted1 day ago,,"[html, \n, [\n, [\n, <script crossorigin=""anon...",Master's (Preferred)Python: 3 years (Preferred...,,$70 - $75 an hour,$70 - $75 an hour - Full-time,$70 - $75 an hour,Machine learning: 3 years (Preferred). 3+ year...,Data Scientist,https://www.indeed.com/company/Integration-Dev...
583,CyberCoders,Remote Lead Machine Learning Engineer I...,,2022-04-17,[Full-time],"Remote in Pittsburgh, PA 15219+5 locations",Posted1 day ago,3.7,"[html, \n, [\n, [\n, <script crossorigin=""anon...",,,"$150,000 - $175,000 a year","$150,000 - $175,000 a year - Full-time","$150,000 - $175,000 a year",Job Type: Direct-hire / Permanent. Work with m...,Remote Lead Machine Learning Engineer,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...


In [9]:
name = position.replace(' ','_')
loc = location.replace(' ','_')
day = date.today()
data.to_csv(f'../app/data/scraped_{name}_{loc}_{day}.csv', index=False)

## Tor as needed

In [None]:
# Use this to use tor after activating it in terminal when needed.
import socks
import socket
socks.setdefaultproxy(proxy_type=socks.PROXY_TYPE_SOCKS5, addr="127.0.0.7", port=9050)
#print(tr.get("http://icanhazip.com").text)

### Concatenating Old Data With New

In [29]:
a = pd.read_csv(f'../app/data/scraped_data_scientist_remote_2022-04-17.csv')
total = pd.read_csv(f'../app/data/total.csv')


In [30]:
#total = pd.read_csv(f'../app/data/total.csv')

z = pd.concat([total,a])
z.to_csv('../app/data/total.csv', index= False)

In [32]:
z

Unnamed: 0,company,description,estimated_salary,extractDate,job_type_items,location,postDate,rating,raw_desc_soup,requirements,sal_guide_items,salary,salary_and_jType,salfromsection,summary,title,url
0,Online Technical Services,"Job descriptionData Scientist, MarketingSAN DI...",,2022-04-13,['Full-time'],Remote,PostedJust posted,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",Master's (Preferred)Python: 1 year (Preferred)...,,"$145,000 - $150,000 a year","$145,000 - $150,000 a year - Full-time","$145,000 - $150,000 a year",Identify relevant data sources and data sets t...,Data Scientist - Marketing,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
1,West CAP,HUMAN was founded in 2012 in a Brooklyn sci-fi...,Estimated $114K – $144K a year,2022-04-13,,"Remote in New York, NY+2 locations",PostedJust posted,3.5,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$114K to $14...",,Full-time,,You’ve worked as a data scientist solving larg...,"Data Scientist, BotGuard",https://www.indeed.com/rc/clk?jk=58cdde046f643...
2,Maya Ai inc.,Our Maya team is expanding and we are looking ...,,2022-04-13,"['Full-time', 'Part-time']",Remote,PostedJust posted,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",Python: 1 year (Preferred)SQL: 1 year (Preferred),,"$77,766 - $183,411 a year","$77,766 - $183,411 a year - Full-time, Part-time","$77,766 - $183,411 a year",Our Analyst will be dealing with data coming i...,Data Scientist,https://www.indeed.com/company/Maya-Ai-inc./jo...
3,"EMERGETECH, INC",Description:Job CategoryData ScienceAbout Emer...,Estimated $94.7K – $120K a year,2022-04-13,,Remote,PostedJust posted,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$94.7K to $1...",,,,Design and create the data sources that ”citiz...,Data Scientist,https://www.indeed.com/rc/clk?jk=95fb128bb025f...
4,Recurrent,What's the opportunity?Recurrent is on a missi...,Estimated $119K – $151K a year,2022-04-13,,"Remote in Seattle, WA",PostedJust posted,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$119K to $15...",,,,Experienced - you have 2+ years of experience ...,Data Scientist,https://www.indeed.com/rc/clk?jk=e9ce610b72deb...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,DIRECTV,"At DIRECTV, we’re connecting the world through...",,2022-04-17,['Full-time'],"Remote in El Segundo, CA 90245",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$169,300 - $338,500 a year","$169,300 - $338,500 a year - Full-time","$169,300 - $338,500 a year",Do you have a passion for digging for answers ...,Senior Director of Data Science,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
581,CyberCoders,Principal Data Scientist If you are a P...,,2022-04-17,['Full-time'],"Remote in Seattle, WA 98164+2 locations",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$150,000 - $200,000 a year","$150,000 - $200,000 a year - Full-time","$150,000 - $200,000 a year",In order to provide the world-class customer e...,Principal Data Scientist,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
582,Integration Developer Network LLC,Machine Learning Engineer24+ MonthsClient 7-El...,,2022-04-17,['Full-time'],Remote,Posted1 day ago,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",Master's (Preferred)Python: 3 years (Preferred...,,$70 - $75 an hour,$70 - $75 an hour - Full-time,$70 - $75 an hour,Machine learning: 3 years (Preferred). 3+ year...,Data Scientist,https://www.indeed.com/company/Integration-Dev...
583,CyberCoders,Remote Lead Machine Learning Engineer I...,,2022-04-17,['Full-time'],"Remote in Pittsburgh, PA 15219+5 locations",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$150,000 - $175,000 a year","$150,000 - $175,000 a year - Full-time","$150,000 - $175,000 a year",Job Type: Direct-hire / Permanent. Work with m...,Remote Lead Machine Learning Engineer,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...


In [23]:
a.postDate

0      PostedJust posted
1      PostedJust posted
2            PostedToday
3         Hiring ongoing
4            PostedToday
             ...        
580      Posted1 day ago
581      Posted1 day ago
582      Posted1 day ago
583      Posted1 day ago
584      Posted1 day ago
Name: postDate, Length: 585, dtype: object

In [24]:
a.extractDate

0      2022-04-17
1      2022-04-17
2      2022-04-17
3      2022-04-17
4      2022-04-17
          ...    
580    2022-04-17
581    2022-04-17
582    2022-04-17
583    2022-04-17
584    2022-04-17
Name: extractDate, Length: 585, dtype: object

In [25]:
a.postDate.value_counts()

Posted1 day ago      381
PostedToday          111
PostedJust posted     78
Hiring ongoing        15
Name: postDate, dtype: int64

In [26]:
data = a

In [27]:
#fix old imports

data['extractDate']= pd.to_datetime(data['extractDate'])

def pDate(row):
    from datetime import datetime, date, timedelta

    #days_ago = row['dateposted']
    delta = timedelta(0)
    try:
        return row['extractDate'] - delta
    except:
        return row

data['extractDate'] = data.apply( lambda row : pDate(row), axis = 1)
data['extractDate'] = data['extractDate'].astype(str)
data

Unnamed: 0,company,description,estimated_salary,extractDate,job_type_items,location,postDate,rating,raw_desc_soup,requirements,sal_guide_items,salary,salary_and_jType,salfromsection,summary,title,url
0,Comcentric,We are currently seeking a Senior Data Scienti...,,2022-04-17,['Full-time'],+21 locationsRemote,PostedJust posted,4.2,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$139,000 - $276,000 a year","$139,000 - $276,000 a year - Full-time","$139,000 - $276,000 a year","Mentor and grow other software engineers, data...",Data Scientist,https://www.indeed.com/rc/clk?jk=4c2d6184ae3a9...
1,Revel,About RevelRevel's mission is to accelerate EV...,Estimated $118K - $150K a year,2022-04-17,,"Remote in Brooklyn, NY",PostedJust posted,2.6,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$118K to $15...",,Full-time,,"Lead the development of our Ride Assignment, R...",Senior Data Scientist - Routing Algorithm,https://www.indeed.com/company/Revel/jobs/Seni...
2,nThrive,nThrive® provides leading healthcare revenue c...,,2022-04-17,['Full-time'],Remote,PostedToday,2.9,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",Bachelor's (Preferred)US work authorization (P...,,"$130,000 - $160,000 a year","$130,000 - $160,000 a year - Full-time","$130,000 - $160,000 a year",Serve as a subject matter expert on the team f...,Lead machine learning engineer,https://www.indeed.com/company/nThrive/jobs/Le...
3,Xen.ai,About the positionXen.AI is looking for Direct...,Estimated $95.3K - $121K a year,2022-04-17,,"Remote in Detroit, MI+40 locations",Hiring ongoing,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$95.3K - $12...",,"Full-time, Part-time, Contract",,Nice to have - familiarity of latest IT trends...,"Director of Marketing - AI, Data Science Techn...",https://www.indeed.com/company/Xen.ai/jobs/Dir...
4,SparkCognition,"Voted Best Places to Work in Austin, Best Payi...",Estimated $106K - $135K a year,2022-04-17,,"Remote in Austin, TX",PostedToday,4.4,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,"['', 'Not provided by employer', ""$106K - $135...",,,,Working closely with data scientists to unders...,Machine Learning Engineer,https://www.indeed.com/rc/clk?jk=40b5d79652e96...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,DIRECTV,"At DIRECTV, we’re connecting the world through...",,2022-04-17,['Full-time'],"Remote in El Segundo, CA 90245",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$169,300 - $338,500 a year","$169,300 - $338,500 a year - Full-time","$169,300 - $338,500 a year",Do you have a passion for digging for answers ...,Senior Director of Data Science,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
581,CyberCoders,Principal Data Scientist If you are a P...,,2022-04-17,['Full-time'],"Remote in Seattle, WA 98164+2 locations",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$150,000 - $200,000 a year","$150,000 - $200,000 a year - Full-time","$150,000 - $200,000 a year",In order to provide the world-class customer e...,Principal Data Scientist,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...
582,Integration Developer Network LLC,Machine Learning Engineer24+ MonthsClient 7-El...,,2022-04-17,['Full-time'],Remote,Posted1 day ago,,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",Master's (Preferred)Python: 3 years (Preferred...,,$70 - $75 an hour,$70 - $75 an hour - Full-time,$70 - $75 an hour,Machine learning: 3 years (Preferred). 3+ year...,Data Scientist,https://www.indeed.com/company/Integration-Dev...
583,CyberCoders,Remote Lead Machine Learning Engineer I...,,2022-04-17,['Full-time'],"Remote in Pittsburgh, PA 15219+5 locations",Posted1 day ago,3.7,"<!DOCTYPE html>\n\n<html dir=""ltr"" lang=""en"">\...",,,"$150,000 - $175,000 a year","$150,000 - $175,000 a year - Full-time","$150,000 - $175,000 a year",Job Type: Direct-hire / Permanent. Work with m...,Remote Lead Machine Learning Engineer,https://www.indeed.com/pagead/clk?mo=r&ad=-6NY...


In [21]:
a.extractDate

0      2022-04-18
1      2022-04-18
2      2022-04-18
3      2022-04-18
4      2022-04-18
          ...    
580    2022-04-18
581    2022-04-18
582    2022-04-18
583    2022-04-18
584    2022-04-18
Name: extractDate, Length: 585, dtype: object

In [51]:
data.to_csv('../app/data/scraped_data_scientist_remote_2022-04-14.csv', index= False)