# Summarizing US Employment Data
Data sets:
- all-ages.csv : Employment data in US by College major for all ages
- recent-grads.csv : Employment data in US by College major for recent college graduates only

The Goal is to Analyse these data.

In [6]:
import pandas as pd

In [7]:
all_ages = pd.read_csv("./Data/all-ages.csv")
all_ages.head(5)

Unnamed: 0,Major_code,Major,Major_category,Total,Employed,Employed_full_time_year_round,Unemployed,Unemployment_rate,Median,P25th,P75th
0,1100,GENERAL AGRICULTURE,Agriculture & Natural Resources,128148,90245,74078,2423,0.026147,50000,34000,80000.0
1,1101,AGRICULTURE PRODUCTION AND MANAGEMENT,Agriculture & Natural Resources,95326,76865,64240,2266,0.028636,54000,36000,80000.0
2,1102,AGRICULTURAL ECONOMICS,Agriculture & Natural Resources,33955,26321,22810,821,0.030248,63000,40000,98000.0
3,1103,ANIMAL SCIENCES,Agriculture & Natural Resources,103549,81177,64937,3619,0.042679,46000,30000,72000.0
4,1104,FOOD SCIENCE,Agriculture & Natural Resources,24280,17281,12722,894,0.049188,62000,38500,90000.0


In [18]:
recent_grads = pd.read_csv("./Data/recent-grads.csv")
recent_grads[["ShareWomen","Median","P25th"]].head(20)

Unnamed: 0,ShareWomen,Median,P25th
0,0.120564,110000,95000
1,0.101852,75000,55000
2,0.153037,73000,50000
3,0.107313,70000,43000
4,0.341631,65000,50000
5,0.144967,65000,50000
6,0.535714,62000,53000
7,0.441356,62000,31500
8,0.139793,60000,48000
9,0.437847,60000,45000


# Number of people by Major_Category

In [58]:
%config IPCompleter.greedy=True

print(all_ages['Major_category'].value_counts().index)

#all_ages_major_categories = dict()
recent_grads_major_categories = dict()

def calculate_major_cat_totals(df):
    cats = df['Major_category'].value_counts().index
    counts_dictionary = dict()

    for c in cats:
        major_df = df[df["Major_category"] == c]
        total = major_df["Total"].sum(axis=0)
        counts_dictionary[c] = total
    return counts_dictionary

all_ages_major_categories = calculate_major_cat_totals(all_ages)
recent_grads_major_categories = calculate_major_cat_totals(recent_grads)
print(all_ages_major_categories)

Index([u'Engineering', u'Education', u'Humanities & Liberal Arts',
       u'Biology & Life Science', u'Business', u'Health',
       u'Computers & Mathematics', u'Physical Sciences',
       u'Agriculture & Natural Resources', u'Psychology & Social Work',
       u'Social Science', u'Arts', u'Industrial Arts & Consumer Services',
       u'Law & Public Policy', u'Communications & Journalism',
       u'Interdisciplinary'],
      dtype='object')
{'Arts': 357130, 'Psychology & Social Work': 481007, 'Business': 1302376, 'Industrial Arts & Consumer Services': 229792, 'Computers & Mathematics': 299008, 'Agriculture & Natural Resources': 79981, 'Interdisciplinary': 12296, 'Humanities & Liberal Arts': 713468, 'Engineering': 537583, 'Biology & Life Science': 453862, 'Health': 463230, 'Law & Public Policy': 179107, 'Physical Sciences': 185479, 'Education': 559129, 'Communications & Journalism': 392601, 'Social Science': 529966}


# Low Wage Jobs Rates

In [79]:
%config IPCompleter.greedy=True

count_low_rage = recent_grads["Low_wage_jobs"].sum()
count_total = recent_grads["Total"].sum()
low_wage_percent = float(count_low_rage) / float(count_total)
print(low_wage_percent)

0.0985254607612


9.85% of graduates took on a low wage job after finishing college.

# Count Number of Majors in a Major Category

In [81]:
print(all_ages['Major_category'].value_counts())

Engineering                            29
Education                              16
Humanities & Liberal Arts              15
Biology & Life Science                 14
Business                               13
Health                                 12
Computers & Mathematics                11
Physical Sciences                      10
Agriculture & Natural Resources        10
Psychology & Social Work                9
Social Science                          9
Arts                                    8
Industrial Arts & Consumer Services     7
Law & Public Policy                     5
Communications & Journalism             4
Interdisciplinary                       1
Name: Major_category, dtype: int64


# Unemployment Rate

In [147]:
majors = recent_grads['Major'].value_counts().index

recent_grads_lower_unemp_count = 0
all_ages_lower_unemp_count = 0


for maj in majors:
    recent_df = recent_grads[recent_grads['Major'] == maj]
    all_ages_df = all_ages[all_ages['Major'] == maj]
    
    recent_grads_unemp_rate = recent_df['Unemployment_rate'].values[0]
    all_ages_unemp_rate = all_ages_df['Unemployment_rate'].values[0]
    
    if recent_grads_unemp_rate < all_ages_unemp_rate:
        
        recent_grads_lower_unemp_count += 1
    elif recent_grads_unemp_rate > all_ages_unemp_rate:
        all_ages_lower_unemp_count += 1
    
    
print(recent_grads_lower_unemp_count)
print(all_ages_lower_unemp_count)
    

43
128


Recent graduates who studied 43 of the 173 majors ended up having lower unemployment rates than the general population.