In [1]:
#import relevant libraries

import pandas as pd
salary_dataset = pd.read_csv(r"C:\Users\nickc\Downloads\Uncleaned_DS_jobs.csv")



In [2]:
#Check the dataframe
salary_dataset.head(5)

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors
0,0,Sr Data Scientist,$137K-$171K (Glassdoor est.),Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst\n3.1,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna"
1,1,Data Scientist,$137K-$171K (Glassdoor est.),"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech\n4.2,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1
2,2,Data Scientist,$137K-$171K (Glassdoor est.),Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group\n3.8,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1
3,3,Data Scientist,$137K-$171K (Glassdoor est.),JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON\n3.5,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech..."
4,4,Data Scientist,$137K-$171K (Glassdoor est.),Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions\n2.9,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee"


In [3]:
#check data types
print(salary_dataset.dtypes)


index                  int64
Job Title             object
Salary Estimate       object
Job Description       object
Rating               float64
Company Name          object
Location              object
Headquarters          object
Size                  object
Founded                int64
Type of ownership     object
Industry              object
Sector                object
Revenue               object
Competitors           object
dtype: object


# Cleaning the data

##### 1) "Salary Estimate" needs to be converted to an integer
##### 2) We should extract State and City Separately from Location Column
##### 3) Company name should have number removed at the end
##### 4)  Replace "\n" with empty space to make Job Description Readable

# 1) Cleaning "Salary Estimate" Column Data

In [4]:
#Remove  (Glassdoor est.) from the end of Salary Estimate
salary_dataset['Salary Estimate'] = salary_dataset['Salary Estimate'].str.replace('\(Glassdoor est.\)', '', regex=True)

# Remove dollar sign
salary_dataset['Salary Estimate'] = salary_dataset['Salary Estimate'].str.replace('$', '')

# Split the range into two separate columns
temp_df = salary_dataset['Salary Estimate'].str.split('-', expand=True)

# Remove 'K' (case insensitive) from both columns, convert to numbers, then multiply by 1000 to reflect 'K' as thousands
# We used Case=False because we do not care if data has K or k, as both represent same value if its found in the column
# We used errors = 'coerce' because this will replace values that can't be converted to a number with NaN.
#This can be useful if we want to keep the rest of our transformations running even if some values can't be converted
salary_dataset['min_salary'] = pd.to_numeric(temp_df[0].str.replace('k', '', case=False).str.replace(' ', ''), errors='coerce') * 1000
salary_dataset['max_salary'] = pd.to_numeric(temp_df[1].str.replace('k', '', case=False).str.replace(' ', ''), errors='coerce') * 1000



In [5]:
salary_dataset.head()

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary
0,0,Sr Data Scientist,137K-171K,Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst\n3.1,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000.0
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech\n4.2,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000.0
2,2,Data Scientist,137K-171K,Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group\n3.8,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000.0
3,3,Data Scientist,137K-171K,JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON\n3.5,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000.0
4,4,Data Scientist,137K-171K,Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions\n2.9,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000.0


In [6]:
# Let's count how many values are in our new columns

min_salary_count = salary_dataset['min_salary'].count()
max_salary_count = salary_dataset['max_salary'].count()

print(f"The 'min_salary' column has {min_salary_count} non-null values.")
print(f"The 'max_salary' column has {max_salary_count} non-null values.")

# Let's count how many NaN or Null values are in our new columns

min_salary_nan_count = salary_dataset['min_salary'].isnull().sum()
max_salary_nan_count = salary_dataset['max_salary'].isnull().sum()

print(f"The 'min_salary' column has {min_salary_nan_count} null values.")
print(f"The 'max_salary' column has {max_salary_nan_count} null values.")



The 'min_salary' column has 672 non-null values.
The 'max_salary' column has 652 non-null values.
The 'min_salary' column has 0 null values.
The 'max_salary' column has 20 null values.


# Let's explore why we have 20 null values in max_salary


In [7]:
# let's get rows where for max_salary we have Nulls
null_max_salary_rows = salary_dataset[salary_dataset['max_salary'].isnull()]
print(null_max_salary_rows)



     index                                          Job Title  \
303    303                                     Data Scientist   
304    304                                     Data Scientist   
305    305                                    Chief Scientist   
306    306                                     Data Scientist   
307    307                                     Data Scientist   
308    308                                     Data Scientist   
309    309                                     Data Scientist   
310    310                                     Data Scientist   
311    311                       Health Plan Data Analyst, Sr   
312    312  Principal Scientist/Associate Director, Qualit...   
313    313                                     Data Scientist   
314    314                              Senior Data Scientist   
315    315                                     Data Scientist   
316    316                                     Data Scientist   
317    317               

In [8]:
# As you can see we also tend to have (Employer est.) as well as (Glassdoor est.) from previous operations.
#Now we need to fix this as well.

#Let's re-run whole process

# Remove "(Glassdoor est.)" and "(Employer est.)"
salary_dataset['Salary Estimate'] = salary_dataset['Salary Estimate'].str.replace('\(Glassdoor est.\)', '', regex=False)
salary_dataset['Salary Estimate'] = salary_dataset['Salary Estimate'].str.replace('\(Employer est.\)', '', regex=False)

# Remove dollar sign
salary_dataset['Salary Estimate'] = salary_dataset['Salary Estimate'].str.replace('$', '')


# Split the range into two separate columns
temp_df = salary_dataset['Salary Estimate'].str.split('-', expand=True)

# Remove 'K' (case insensitive) from both columns, convert to numbers, then multiply by 1000 to reflect 'K' as thousands
# We used Case=False because we do not care if data has K or k, as both represent same value if its found in the column
# We used errors = 'coerce' because this will replace values that can't be converted to a number with NaN.
#This can be useful if we want to keep the rest of our transformations running even if some values can't be converted
salary_dataset['min_salary'] = pd.to_numeric(temp_df[0].str.replace('k', '', case=False).str.replace(' ', ''), errors='coerce') * 1000
salary_dataset['max_salary'] = pd.to_numeric(temp_df[1].str.replace('k', '', case=False).str.replace(' ', ''), errors='coerce') * 1000



In [9]:
# Let's count how many values are in our new columns

min_salary_count = salary_dataset['min_salary'].count()
max_salary_count = salary_dataset['max_salary'].count()

print(f"The 'min_salary' column has {min_salary_count} non-null values.")
print(f"The 'max_salary' column has {max_salary_count} non-null values.")

# Let's count how many NaN or Null values are in our new columns

min_salary_nan_count = salary_dataset['min_salary'].isnull().sum()
max_salary_nan_count = salary_dataset['max_salary'].isnull().sum()

print(f"The 'min_salary' column has {min_salary_nan_count} null values.")
print(f"The 'max_salary' column has {max_salary_nan_count} null values.")



The 'min_salary' column has 672 non-null values.
The 'max_salary' column has 652 non-null values.
The 'min_salary' column has 0 null values.
The 'max_salary' column has 20 null values.


# As seen below, our temporary dataframe did not have null values
# So, It appears that the issue is arising during the conversion of the temp_df

In [10]:
min_salary_nan2_count2 = temp_df.isnull().count()
max_salary_nan2_count2 = temp_df.isnull().count()

print(min_salary_nan2_count2)

# It appears that the issue is arising during the conversion of the temp_df

0    672
1    672
dtype: int64


In [11]:
# Convert 'min_salary' column to numeric
salary_dataset['min_salary'] = pd.to_numeric(temp_df[0].str.extract(r'(\d+)', expand=False), errors='coerce')

# Convert 'max_salary' column to numeric
salary_dataset['max_salary'] = pd.to_numeric(temp_df[1].str.extract(r'(\d+)', expand=False), errors='coerce')

# Multiply by 1000 to reflect 'K' as thousands
salary_dataset['min_salary'] *= 1000
salary_dataset['max_salary'] *= 1000


In [12]:
salary_dataset

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary
0,0,Sr Data Scientist,137K-171K,Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst\n3.1,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech\n4.2,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000
2,2,Data Scientist,137K-171K,Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group\n3.8,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000
3,3,Data Scientist,137K-171K,JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON\n3.5,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000
4,4,Data Scientist,137K-171K,Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions\n2.9,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,667,Data Scientist,105K-167K,Summary\n\nWe’re looking for a data scientist ...,3.6,TRANZACT\n3.6,"Fort Lee, NJ","Fort Lee, NJ",1001 to 5000 employees,1989,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,-1,105000,167000
668,668,Data Scientist,105K-167K,Job Description\nBecome a thought leader withi...,-1.0,JKGT,"San Francisco, CA",-1,-1,-1,-1,-1,-1,-1,-1,105000,167000
669,669,Data Scientist,105K-167K,Join a thriving company that is changing the w...,-1.0,AccessHope,"Irwindale, CA",-1,-1,-1,-1,-1,-1,-1,-1,105000,167000
670,670,Data Scientist,105K-167K,100 Remote Opportunity As an AINLP Data Scient...,5.0,ChaTeck Incorporated\n5.0,"San Francisco, CA","Santa Clara, CA",1 to 50 employees,-1,Company - Private,Advertising & Marketing,Business Services,$1 to $5 million (USD),-1,105000,167000


In [13]:
# Let's count how many values are in our new columns

min_salary_count = salary_dataset['min_salary'].count()
max_salary_count = salary_dataset['max_salary'].count()

print(f"The 'min_salary' column has {min_salary_count} non-null values.")
print(f"The 'max_salary' column has {max_salary_count} non-null values.")

# Let's count how many NaN or Null values are in our new columns

min_salary_nan_count = salary_dataset['min_salary'].isnull().sum()
max_salary_nan_count = salary_dataset['max_salary'].isnull().sum()

print(f"The 'min_salary' column has {min_salary_nan_count} null values.")
print(f"The 'max_salary' column has {max_salary_nan_count} null values.")

The 'min_salary' column has 672 non-null values.
The 'max_salary' column has 672 non-null values.
The 'min_salary' column has 0 null values.
The 'max_salary' column has 0 null values.


In [14]:
# This line will be adding average salary of minimum and maximum salary range
salary_dataset["Average Salary"] = (salary_dataset["min_salary"] + salary_dataset["max_salary"])/2
salary_dataset["Average Salary"] = salary_dataset["Average Salary"].astype(int)



# We have fixed our issue, now our Salary data is clean and correct
## Feature Engineering: We have finally derived 2 new columns (minimum and maximum salary) from our Salary Estimate range in correct format. Now Let's create Average of these 2 as a separate column


In [15]:
salary_dataset.head()

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary,Average Salary
0,0,Sr Data Scientist,137K-171K,Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst\n3.1,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000,154000
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech\n4.2,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000,154000
2,2,Data Scientist,137K-171K,Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group\n3.8,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000,154000
3,3,Data Scientist,137K-171K,JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON\n3.5,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000,154000
4,4,Data Scientist,137K-171K,Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions\n2.9,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000,154000


# 2) Splitting City and State for Job Location 


In [16]:
# Split the column into two separate columns using the comma as the delimiter
new_cols = salary_dataset['Location'].str.split(', ', expand=True)

# Assign the new columns to the DataFrame
salary_dataset['Location_City'] = new_cols[0]
salary_dataset['Location_State'] = new_cols[1]


In [17]:
salary_dataset.head(25)

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary,Average Salary,Location_City,Location_State
0,0,Sr Data Scientist,137K-171K,Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst\n3.1,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000,154000,New York,NY
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech\n4.2,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000,154000,Chantilly,VA
2,2,Data Scientist,137K-171K,Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group\n3.8,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000,154000,Boston,MA
3,3,Data Scientist,137K-171K,JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON\n3.5,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000,154000,Newton,MA
4,4,Data Scientist,137K-171K,Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions\n2.9,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000,154000,New York,NY
5,5,Data Scientist,137K-171K,About Us:\n\nHeadquartered in beautiful Santa ...,4.2,HG Insights\n4.2,"Santa Barbara, CA","Santa Barbara, CA",51 to 200 employees,2010,Company - Private,Computer Hardware & Software,Information Technology,Unknown / Non-Applicable,-1,137000,171000,154000,Santa Barbara,CA
6,6,Data Scientist / Machine Learning Expert,137K-171K,Posting Title\nData Scientist / Machine Learni...,3.9,Novartis\n3.9,"Cambridge, MA","Basel, Switzerland",10000+ employees,1996,Company - Public,Biotech & Pharmaceuticals,Biotech & Pharmaceuticals,$10+ billion (USD),-1,137000,171000,154000,Cambridge,MA
7,7,Data Scientist,137K-171K,Introduction\n\nHave you always wanted to run ...,3.5,iRobot\n3.5,"Bedford, MA","Bedford, MA",1001 to 5000 employees,1990,Company - Public,Consumer Electronics & Appliances Stores,Retail,$1 to $2 billion (USD),-1,137000,171000,154000,Bedford,MA
8,8,Staff Data Scientist - Analytics,137K-171K,Intuit is seeking a Staff Data Scientist to co...,4.4,Intuit - Data\n4.4,"San Diego, CA","Mountain View, CA",5001 to 10000 employees,1983,Company - Public,Computer Hardware & Software,Information Technology,$2 to $5 billion (USD),"Square, PayPal, H&R Block",137000,171000,154000,San Diego,CA
9,9,Data Scientist,137K-171K,Ready to write the best chapter of your career...,3.6,XSELL Technologies\n3.6,"Chicago, IL","Chicago, IL",51 to 200 employees,2014,Company - Private,Enterprise Software & Network Solutions,Information Technology,Unknown / Non-Applicable,-1,137000,171000,154000,Chicago,IL


# 3) Remove review number from Company Name


In [18]:
salary_dataset['Company Name'] = salary_dataset['Company Name'].str.replace(r'\s*\d+(\.\d+)?\s*$', '', regex=True)


In [19]:
salary_dataset.head()

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary,Average Salary,Location_City,Location_State
0,0,Sr Data Scientist,137K-171K,Description\n\nThe Senior Data Scientist is re...,3.1,Healthfirst,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000,154000,New York,NY
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future\n\nJoin ...",4.2,ManTech,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000,154000,Chantilly,VA
2,2,Data Scientist,137K-171K,Overview\n\n\nAnalysis Group is one of the lar...,3.8,Analysis Group,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000,154000,Boston,MA
3,3,Data Scientist,137K-171K,JOB DESCRIPTION:\n\nDo you have a passion for ...,3.5,INFICON,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000,154000,Newton,MA
4,4,Data Scientist,137K-171K,Data Scientist\nAffinity Solutions / Marketing...,2.9,Affinity Solutions,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000,154000,New York,NY


# 4) Replace "\n" with empty space to make Job Description Readable

In [20]:
salary_dataset['Job Description'] = salary_dataset['Job Description'].str.replace('\n', ' ')


In [21]:
salary_dataset

Unnamed: 0,index,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,min_salary,max_salary,Average Salary,Location_City,Location_State
0,0,Sr Data Scientist,137K-171K,Description The Senior Data Scientist is resp...,3.1,Healthfirst,"New York, NY","New York, NY",1001 to 5000 employees,1993,Nonprofit Organization,Insurance Carriers,Insurance,Unknown / Non-Applicable,"EmblemHealth, UnitedHealth Group, Aetna",137000,171000,154000,New York,NY
1,1,Data Scientist,137K-171K,"Secure our Nation, Ignite your Future Join th...",4.2,ManTech,"Chantilly, VA","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,137000,171000,154000,Chantilly,VA
2,2,Data Scientist,137K-171K,Overview Analysis Group is one of the larges...,3.8,Analysis Group,"Boston, MA","Boston, MA",1001 to 5000 employees,1981,Private Practice / Firm,Consulting,Business Services,$100 to $500 million (USD),-1,137000,171000,154000,Boston,MA
3,3,Data Scientist,137K-171K,JOB DESCRIPTION: Do you have a passion for Da...,3.5,INFICON,"Newton, MA","Bad Ragaz, Switzerland",501 to 1000 employees,2000,Company - Public,Electrical & Electronic Manufacturing,Manufacturing,$100 to $500 million (USD),"MKS Instruments, Pfeiffer Vacuum, Agilent Tech...",137000,171000,154000,Newton,MA
4,4,Data Scientist,137K-171K,Data Scientist Affinity Solutions / Marketing ...,2.9,Affinity Solutions,"New York, NY","New York, NY",51 to 200 employees,1998,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,"Commerce Signals, Cardlytics, Yodlee",137000,171000,154000,New York,NY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,667,Data Scientist,105K-167K,Summary We’re looking for a data scientist to...,3.6,TRANZACT,"Fort Lee, NJ","Fort Lee, NJ",1001 to 5000 employees,1989,Company - Private,Advertising & Marketing,Business Services,Unknown / Non-Applicable,-1,105000,167000,136000,Fort Lee,NJ
668,668,Data Scientist,105K-167K,Job Description Become a thought leader within...,-1.0,JKGT,"San Francisco, CA",-1,-1,-1,-1,-1,-1,-1,-1,105000,167000,136000,San Francisco,CA
669,669,Data Scientist,105K-167K,Join a thriving company that is changing the w...,-1.0,AccessHope,"Irwindale, CA",-1,-1,-1,-1,-1,-1,-1,-1,105000,167000,136000,Irwindale,CA
670,670,Data Scientist,105K-167K,100 Remote Opportunity As an AINLP Data Scient...,5.0,ChaTeck Incorporated,"San Francisco, CA","Santa Clara, CA",1 to 50 employees,-1,Company - Private,Advertising & Marketing,Business Services,$1 to $5 million (USD),-1,105000,167000,136000,San Francisco,CA
