## Data analysis on various university majors and their salaries at different points in their career

### source:  we will begin with the 2008 dataset of the university majors collected from the website:
### https://www.payscale.com/college-salary-report/majors-that-pay-you-back/bachelors

### Later we will webscrape the 2023 data from this website:

In [2]:
import pandas as pd
import numpy as np
df =  pd.read_csv("salaries_by_college_major.csv")

In [3]:
df

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,Accounting,46000.0,77100.0,42200.0,152000.0,Business
1,Aerospace Engineering,57700.0,101000.0,64300.0,161000.0,STEM
2,Agriculture,42600.0,71900.0,36300.0,150000.0,Business
3,Anthropology,36800.0,61500.0,33800.0,138000.0,HASS
4,Architecture,41600.0,76800.0,50600.0,136000.0,Business
5,Art History,35800.0,64900.0,28800.0,125000.0,HASS
6,Biology,38800.0,64800.0,36900.0,135000.0,STEM
7,Business Management,43000.0,72100.0,38800.0,147000.0,Business
8,Chemical Engineering,63200.0,107000.0,71900.0,194000.0,STEM
9,Chemistry,42600.0,79900.0,45300.0,148000.0,STEM


In [5]:
#Lets look at the head:
df.head()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,Accounting,46000.0,77100.0,42200.0,152000.0,Business
1,Aerospace Engineering,57700.0,101000.0,64300.0,161000.0,STEM
2,Agriculture,42600.0,71900.0,36300.0,150000.0,Business
3,Anthropology,36800.0,61500.0,33800.0,138000.0,HASS
4,Architecture,41600.0,76800.0,50600.0,136000.0,Business


In [6]:
#Basic info:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 6 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Undergraduate Major                51 non-null     object 
 1   Starting Median Salary             50 non-null     float64
 2   Mid-Career Median Salary           50 non-null     float64
 3   Mid-Career 10th Percentile Salary  50 non-null     float64
 4   Mid-Career 90th Percentile Salary  50 non-null     float64
 5   Group                              50 non-null     object 
dtypes: float64(4), object(2)
memory usage: 2.5+ KB


In [7]:
#Columns:
df.columns

Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group'],
      dtype='object')

In [10]:
df.shape

(51, 6)

In [8]:
#Check if there are any null values:
df.isnull()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False


In [9]:
#50th row has null value:
df.iloc[50]

Undergraduate Major                  Source: PayScale Inc.
Starting Median Salary                                 NaN
Mid-Career Median Salary                               NaN
Mid-Career 10th Percentile Salary                      NaN
Mid-Career 90th Percentile Salary                      NaN
Group                                                  NaN
Name: 50, dtype: object

In [13]:
#Lets drop the 50th row for now:
df.dropna(axis=0, inplace=True)

In [14]:
#check if the nan's are removed:
df.tail()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
45,Political Science,40800.0,78200.0,41200.0,168000.0,HASS
46,Psychology,35900.0,60400.0,31600.0,127000.0,HASS
47,Religion,34100.0,52000.0,29700.0,96400.0,HASS
48,Sociology,36500.0,58200.0,30700.0,118000.0,HASS
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS


In [17]:
#college major with highest median salary:
df.loc[df.iloc[:,1].idxmax(), 'Undergraduate Major']

'Physician Assistant'

In [19]:
df.iloc[df.iloc[:,1].idxmax()]

Undergraduate Major                  Physician Assistant
Starting Median Salary                           74300.0
Mid-Career Median Salary                         91700.0
Mid-Career 10th Percentile Salary                66400.0
Mid-Career 90th Percentile Salary               124000.0
Group                                               STEM
Name: 43, dtype: object

In [23]:
df.columns

Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group'],
      dtype='object')

In [18]:
#colllege major with highest-mid career (10+ years) salary:

df.loc[df.loc[:,'Mid-Career Median Salary'].idxmax(), 'Undergraduate Major']

'Chemical Engineering'

In [21]:
df.iloc[df.loc[:,'Mid-Career Median Salary'].idxmax()]

Undergraduate Major                  Chemical Engineering
Starting Median Salary                            63200.0
Mid-Career Median Salary                         107000.0
Mid-Career 10th Percentile Salary                 71900.0
Mid-Career 90th Percentile Salary                194000.0
Group                                                STEM
Name: 8, dtype: object

In [24]:
#college major with lowest median salary:
df.loc[df.iloc[:,1].idxmin(),:]



Undergraduate Major                  Spanish
Starting Median Salary               34000.0
Mid-Career Median Salary             53100.0
Mid-Career 10th Percentile Salary    31000.0
Mid-Career 90th Percentile Salary    96400.0
Group                                   HASS
Name: 49, dtype: object

In [29]:
#how much can a major in Spanish can expect to earn:
df.iloc[49,1]

34000.0

In [25]:
#lowest mid career salary:
df.iloc[df.iloc[:,2].idxmin(),:]

Undergraduate Major                  Education
Starting Median Salary                 34900.0
Mid-Career Median Salary               52000.0
Mid-Career 10th Percentile Salary      29300.0
Mid-Career 90th Percentile Salary     102000.0
Group                                     HASS
Name: 18, dtype: object

In [26]:
#how much can a major in education expect to earn:
df.iloc[18,1]

34900.0

In [31]:
# major with least risk(least difference between the salary of 10th percentile and 90th percentile):

In [46]:
#add a column called risk which calculates this difference:
df.loc[:,'risk']=df.iloc[:,4]-df.iloc[:,3]

In [34]:
df

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group,risk
0,Accounting,46000.0,77100.0,42200.0,152000.0,Business,109800.0
1,Aerospace Engineering,57700.0,101000.0,64300.0,161000.0,STEM,96700.0
2,Agriculture,42600.0,71900.0,36300.0,150000.0,Business,113700.0
3,Anthropology,36800.0,61500.0,33800.0,138000.0,HASS,104200.0
4,Architecture,41600.0,76800.0,50600.0,136000.0,Business,85400.0
5,Art History,35800.0,64900.0,28800.0,125000.0,HASS,96200.0
6,Biology,38800.0,64800.0,36900.0,135000.0,STEM,98100.0
7,Business Management,43000.0,72100.0,38800.0,147000.0,Business,108200.0
8,Chemical Engineering,63200.0,107000.0,71900.0,194000.0,STEM,122100.0
9,Chemistry,42600.0,79900.0,45300.0,148000.0,STEM,102700.0


In [35]:
low_risk = df.sort_values(by=["risk"], ascending=True)

In [38]:
#Low risk majors:
low_risk.iloc[:,[0,6]].head(10)

Unnamed: 0,Undergraduate Major,risk
40,Nursing,50700.0
43,Physician Assistant,57600.0
41,Nutrition,65300.0
49,Spanish,65400.0
27,Health Care Administration,66400.0
47,Religion,66700.0
23,Forestry,70000.0
32,Interior Design,71300.0
18,Education,72700.0
15,Criminal Justice,74800.0


In [40]:
df.columns


Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group', 'risk'],
      dtype='object')

In [47]:
# Major with highest potential in their pay:

highest_potential = df.sort_values(by=['Mid-Career 90th Percentile Salary'], ascending=False)

In [48]:
highest_potential.head(5)

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group,risk
17,Economics,50100.0,98600.0,50600.0,210000.0,Business,159400.0
22,Finance,47900.0,88300.0,47200.0,195000.0,Business,147800.0
8,Chemical Engineering,63200.0,107000.0,71900.0,194000.0,STEM,122100.0
37,Math,45400.0,92400.0,45200.0,183000.0,STEM,137800.0
44,Physics,50300.0,97300.0,56000.0,178000.0,STEM,122000.0


In [49]:
#greatest spread in the pay (highest risk):
greatest_spread = df.sort_values(by='risk', ascending=False)

In [50]:
greatest_spread.head(5)

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group,risk
17,Economics,50100.0,98600.0,50600.0,210000.0,Business,159400.0
22,Finance,47900.0,88300.0,47200.0,195000.0,Business,147800.0
37,Math,45400.0,92400.0,45200.0,183000.0,STEM,137800.0
36,Marketing,40800.0,79600.0,42100.0,175000.0,Business,132900.0
42,Philosophy,39900.0,81200.0,35500.0,168000.0,HASS,132500.0


In [52]:
#The 3 majors among the top: economics, math, finance are present -
# in both the tables. Indicating there are graduates from these majors who -
# are high earners but there are also some who are not
# earning much!.


In [54]:
#Highest mid-career salary majors:
mid_highest=df.sort_values(by="Mid-Career Median Salary", ascending=False)
mid_highest.iloc[:,[0,2]].head()

Unnamed: 0,Undergraduate Major,Mid-Career Median Salary
8,Chemical Engineering,107000.0
12,Computer Engineering,105000.0
19,Electrical Engineering,103000.0
1,Aerospace Engineering,101000.0
17,Economics,98600.0


In [55]:
df.columns

Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group', 'risk'],
      dtype='object')

In [79]:
pd.options.display.float_format = '{:,.2f}'.format

In [85]:
#stats on various groups: Business, STEM and HASS(humanities, arts and social sciences)
df.groupby('Group').apply(lambda x:x.iloc[:,[2,3,4,6]].mean())

Unnamed: 0_level_0,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,risk
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Business,75083.33,43566.67,147525.0,103958.33
HASS,62968.18,34145.45,129363.64,95218.18
STEM,90812.5,56025.0,157625.0,101600.0


## Using payscale website to webscrape the 2023 data on university majors and their median pay at different points of their careers:

In [117]:
#webscrape
import requests
import json
from bs4 import BeautifulSoup 
l=[]
for i in range(1,33):
  payscale= requests.get("https://www.payscale.com/college-salary-report/majors-that-pay-you-back/bachelors/page/" + str(i))
  soup = BeautifulSoup(payscale.text, 'html.parser')
  for e in enumerate((soup.find(type="application/json").getText().split("]")[0].split("}"))):
      if e[1] != "":
        if e[0] == 0:
          l.append(json.loads(e[1].split("[")[1] + "}"))
        else:
          l.append(json.loads("{" + e[1].split(",{")[1] + "}"))




In [118]:
l

[{'id': 5088,
  'degreeType': 'Bachelors',
  'major': 'Petroleum Engineering',
  'earlyCareerMedianPay': 97500,
  'midCareerMedianPay': 212500,
  'fiveToTenYearMedianPay': 151800,
  'tenToTwentyYearMedianPay': 197600,
  'twentyPlusYearMedianPay': 226100,
  'percentHighMeaning': 0.61,
  'year': 2023,
  'url': '/research/US/Degree=Bachelor%27s_Degree%2C_Petroleum_Engineering/Salary',
  'rank': 1,
  'tie': False},
 {'id': 5614,
  'degreeType': 'Bachelors',
  'major': 'Operations Research & Industrial Engineering',
  'earlyCareerMedianPay': 98300,
  'midCareerMedianPay': 191800,
  'fiveToTenYearMedianPay': 153700,
  'tenToTwentyYearMedianPay': 188500,
  'twentyPlusYearMedianPay': None,
  'percentHighMeaning': 0.21,
  'year': 2023,
  'url': '/research/US/Degree=Bachelor%27s_Degree%2C_Operations_Research_%26_Industrial_Engineering/Salary',
  'rank': 2,
  'tie': False},
 {'id': 5615,
  'degreeType': 'Bachelors',
  'major': 'Interaction Design',
  'earlyCareerMedianPay': 74700,
  'midCareerMed

In [119]:
len(l)

799

In [123]:
df_current = pd.DataFrame(l)

In [124]:
#note there are some columns with nan values:
df_current.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 799 entries, 0 to 798
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        799 non-null    int64  
 1   degreeType                799 non-null    object 
 2   major                     799 non-null    object 
 3   earlyCareerMedianPay      799 non-null    int64  
 4   midCareerMedianPay        799 non-null    int64  
 5   fiveToTenYearMedianPay    752 non-null    float64
 6   tenToTwentyYearMedianPay  712 non-null    float64
 7   twentyPlusYearMedianPay   510 non-null    float64
 8   percentHighMeaning        739 non-null    float64
 9   year                      799 non-null    int64  
 10  url                       799 non-null    object 
 11  rank                      799 non-null    int64  
 12  tie                       799 non-null    bool   
dtypes: bool(1), float64(4), int64(5), object(3)
memory usage: 75.8+ K

### Note that 5-10 year median salary is null for many entries, so further analysis with that column is not accurate but for the sake of getting familiar with pandas we will continue the analysis with that column.

In [125]:
df_current.columns

Index(['id', 'degreeType', 'major', 'earlyCareerMedianPay',
       'midCareerMedianPay', 'fiveToTenYearMedianPay',
       'tenToTwentyYearMedianPay', 'twentyPlusYearMedianPay',
       'percentHighMeaning', 'year', 'url', 'rank', 'tie'],
      dtype='object')

In [126]:
df_current.shape

(799, 13)

In [142]:
#The data set has no repeated entries:
df_current[df_current.loc[:,'major'].duplicated()]

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread


In [148]:
#Lets look at all the majors available in the data:
for i in df_current["major"]:
    print(i)

Petroleum Engineering
Operations Research & Industrial Engineering
Interaction Design
Applied Economics and Management
Building Science
Actuarial Mathematics
Operations Research
Systems Engineering
Optical Science & Engineering
Information & Computer Science
Aeronautics & Astronautics
Aerospace Studies
Pharmacy
Cognitive Science
Managerial Economics
Quantitative Economics
Econometrics
Foreign Affairs
Chemical Engineering
Public Accounting
Political Economy
Corporate Accounting & Finance
Actuarial Science
Computer Engineering (CE)
Electrical Power Engineering
Computer Science (CS) & Engineering
Marine Transportation Management
Marine Engineering
Operations & Information Systems Management
Electrical & Computer Engineering (ECE)
Entrepreneurship & Marketing
Astronomy
Mechatronics
Computational & Applied Mathematics
Aeronautical Engineering
Welding Engineering
Electrical Engineering (EE)
Electrical & Electronics Engineering (EEE)
Naval Architecture & Marine Engineering
Instrumentation & C

In [154]:
#major with highest mid-career salary:

#if thyere were duplicates then try: df_current.groupby('major').apply(lambda x:x.loc[:,"midCareerMedianPay"]).idxmax()

df_current.sort_values(by="midCareerMedianPay",ascending=False).head()

##petroleum engineering is the major with highest midCareerMedium Pay

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread
0,5088,Bachelors,Petroleum Engineering,97500,212500,151800.0,197600.0,226100.0,0.61,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Pet...,1,False,54300.0
1,5614,Bachelors,Operations Research & Industrial Engineering,98300,191800,153700.0,188500.0,,0.21,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Ope...,2,False,55400.0
2,5615,Bachelors,Interaction Design,74700,173600,118400.0,173600.0,,0.54,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Int...,3,False,43700.0
3,5616,Bachelors,Applied Economics and Management,76500,164400,112800.0,164400.0,,0.54,2023,/research/US/Degree=Bachelor%27s_Degree%2C_App...,4,False,36300.0
4,5617,Bachelors,Building Science,69000,163100,99100.0,152600.0,179000.0,0.47,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Bui...,5,False,30100.0


In [157]:
#major with lowest mid-career salary:

# This is another way to do it if duplicates available: df_current.groupby('major').apply(lambda x:x.loc[:,"midCareerMedianPay"]).idxmin()

df_current.sort_values(by="midCareerMedianPay",ascending=True).head()

## Metalsmithing is the major with lowest midCareerMedium Pay

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread
798,5886,Bachelors,Metalsmithing,44700,44900,57400.0,,,0.36,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Met...,799,False,12700.0
797,5486,Bachelors,Early Childhood Education,41700,50600,44300.0,48800.0,54200.0,0.77,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Ear...,798,False,2600.0
796,5359,Bachelors,Nonprofit Administration,42800,52100,54500.0,,,0.72,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Non...,797,False,11700.0
795,5358,Bachelors,Addictions Counseling,45900,52200,,50300.0,,0.78,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Add...,796,False,
794,5357,Bachelors,Equine Studies,40300,52500,,51900.0,,0.5,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Equ...,795,False,


In [159]:
#Major with lowest starting salary:

## another way if duplicates available: df_current.groupby('major').apply(lambda x:x.loc[:,"earlyCareerMedianPay"]).idxmin()

df_current.sort_values(by="earlyCareerMedianPay", ascending=True).head()

#lowest starting salary major is Voice $ Opera

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread
767,5380,Bachelors,Voice & Opera,38700,63200,56600.0,71500.0,,0.53,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Voi...,768,False,17900.0
576,5139,Bachelors,Painting & Printmaking,39300,82600,45200.0,82600.0,,0.46,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Pai...,575,True,5900.0
794,5357,Bachelors,Equine Studies,40300,52500,,51900.0,,0.5,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Equ...,795,False,
785,5348,Bachelors,Mental Health Counseling,40600,58500,54800.0,,,,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Men...,786,True,14200.0
745,5308,Bachelors,Rehabilitation Services,41300,67600,48600.0,57600.0,71200.0,0.74,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Reh...,746,True,7300.0


In [160]:
#major with the lowest spread in the differnce in the pay between the early career median pay and the five to ten year median pay:

In [161]:
df_current['spread']=df_current.loc[:,'fiveToTenYearMedianPay'] - df_current.loc[:,'earlyCareerMedianPay']

In [173]:
df_current.groupby(by='major').apply(lambda x:x.loc[:,'spread']).idxmax()
df_current.sort_values(by='spread', ascending=False).head()

#Pharmacy has the highest differnce in the pay between early career median pay and the five to ten year median pay

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread
12,5625,Bachelors,Pharmacy,69400,154000,130100.0,153800.0,154300.0,0.79,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Pha...,13,False,60700.0
243,5806,Bachelors,Cell Biology,56800,108100,114800.0,,,0.55,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Cel...,244,True,58000.0
9,5622,Bachelors,Information & Computer Science,76000,157800,131700.0,146700.0,171200.0,0.63,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Inf...,10,False,55700.0
1,5614,Bachelors,Operations Research & Industrial Engineering,98300,191800,153700.0,188500.0,,0.21,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Ope...,2,False,55400.0
0,5088,Bachelors,Petroleum Engineering,97500,212500,151800.0,197600.0,226100.0,0.61,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Pet...,1,False,54300.0


In [164]:
#Pharmacy has the highest spread in the median salary pay after five to ten years.
df_current.loc[12,'spread'] 

60700.0

In [174]:
df_current.groupby(by='major').apply(lambda x:x.loc[:,'spread']).idxmin()
df_current.sort_values(by='spread', ascending=True).head()

#Personnel management has the highest differnce in the pay between early career median pay and after five to ten year median pay

Unnamed: 0,id,degreeType,major,earlyCareerMedianPay,midCareerMedianPay,fiveToTenYearMedianPay,tenToTwentyYearMedianPay,twentyPlusYearMedianPay,percentHighMeaning,year,url,rank,tie,spread
394,5757,Bachelors,Personnel Management,57900,94800,58100.0,80900.0,104500.0,0.68,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Per...,393,True,200.0
719,5332,Bachelors,Recreation Administration,54200,70600,54500.0,69900.0,72800.0,0.66,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Rec...,720,True,300.0
766,5379,Bachelors,Music Therapy,48500,63500,49500.0,61900.0,64100.0,0.95,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Mus...,767,False,1000.0
741,5304,Bachelors,Elementary Special Education,51200,68300,53100.0,61900.0,,0.81,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Ele...,742,False,1900.0
663,5476,Bachelors,Ornamental Horticulture,51600,76000,53700.0,69100.0,86600.0,0.67,2023,/research/US/Degree=Bachelor%27s_Degree%2C_Orn...,663,True,2100.0


In [168]:
df_current.loc[19,'spread']   # Personnel accounting has the minimum spread of median salary pay after 5 to 10 years

7900.0