## Introduction to the data

In [1]:
import pandas as pd

all_ages = pd.read_csv("all-ages.csv")
recent_grads = pd.read_csv("recent-grads.csv")

In [2]:
all_ages.iloc[0]

Major_code                                                  1100
Major                                        GENERAL AGRICULTURE
Major_category                   Agriculture & Natural Resources
Total                                                     128148
Employed                                                   90245
Employed_full_time_year_round                              74078
Unemployed                                                  2423
Unemployment_rate                                      0.0261471
Median                                                     50000
P25th                                                      34000
P75th                                                      80000
Name: 0, dtype: object

In [3]:
recent_grads.iloc[0]

Rank                                        1
Major_code                               2419
Major                   PETROLEUM ENGINEERING
Total                                    2339
Men                                      2057
Women                                     282
Major_category                    Engineering
ShareWomen                           0.120564
Sample_size                                36
Employed                                 1976
Full_time                                1849
Part_time                                 270
Full_time_year_round                     1207
Unemployed                                 37
Unemployment_rate                   0.0183805
Median                                 110000
P25th                                   95000
P75th                                  125000
College_jobs                             1534
Non_college_jobs                          364
Low_wage_jobs                             193
Name: 0, dtype: object

## Summarizing major categories

In [4]:
# Unique values in Major_category column.
majors = all_ages["Major_category"].unique()

aa_cat_counts = dict()
rg_cat_counts = dict()

for each in majors:
    rows_ages = all_ages.loc[all_ages["Major_category"] == each]
    rows_grads = recent_grads.loc[recent_grads["Major_category"] == each]
    aa_cat_counts[each] = rows_ages["Total"].sum()
    rg_cat_counts[each] = rows_grads["Total"].sum()

In [5]:
aa_cat_counts

{'Agriculture & Natural Resources': 632437,
 'Arts': 1805865,
 'Biology & Life Science': 1338186,
 'Business': 9858741,
 'Communications & Journalism': 1803822,
 'Computers & Mathematics': 1781378,
 'Education': 4700118,
 'Engineering': 3576013,
 'Health': 2950859,
 'Humanities & Liberal Arts': 3738335,
 'Industrial Arts & Consumer Services': 1033798,
 'Interdisciplinary': 45199,
 'Law & Public Policy': 902926,
 'Physical Sciences': 1025318,
 'Psychology & Social Work': 1987278,
 'Social Science': 2654125}

In [6]:
rg_cat_counts

{'Agriculture & Natural Resources': 75620.0,
 'Arts': 357130.0,
 'Biology & Life Science': 453862.0,
 'Business': 1302376.0,
 'Communications & Journalism': 392601.0,
 'Computers & Mathematics': 299008.0,
 'Education': 559129.0,
 'Engineering': 537583.0,
 'Health': 463230.0,
 'Humanities & Liberal Arts': 713468.0,
 'Industrial Arts & Consumer Services': 229792.0,
 'Interdisciplinary': 12296.0,
 'Law & Public Policy': 179107.0,
 'Physical Sciences': 185479.0,
 'Psychology & Social Work': 481007.0,
 'Social Science': 529966.0}

## Low-wage job rates

In [7]:
low_wage_proportion = ( recent_grads["Low_wage_jobs"].sum() / recent_grads["Total"].sum() ) * 100

In [8]:
low_wage_proportion

9.858891195563151

## Comparing data sets

In [9]:
# All majors, common to both DataFrames
majors = recent_grads['Major'].unique()

rg_lower_count = 0

for each in majors:
    row_ages = all_ages.loc[all_ages["Major"] == each]
    row_grads = recent_grads.loc[recent_grads["Major"] == each]
    if row_ages["Unemployment_rate"].sum() > row_grads["Unemployment_rate"].sum():
        rg_lower_count += 1

In [10]:
# It appears that less recent graduates who studied 44 of the 173 majors ended up
# having lower unemployment rates than the general population.
rg_lower_count

44