In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__

'1.3.5'

# 147: Introducing (Five?) New Datasets

In [3]:
# DATA URL SOURCES ###############################
eng_url     = 'https://andybek.com/pandas-eng'
state_url   = 'https://andybek.com/pandas-state'
party_url   = 'https://andybek.com/pandas-party'
liberal_url = 'https://andybek.com/pandas-liberal'
ivies_url   = 'https://andybek.com/pandas-ivies'
##################################################

In [4]:
eng = pd.read_csv(eng_url)

In [5]:
state = pd.read_csv(state_url)

In [6]:
state.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00"


In [7]:
party = pd.read_csv(party_url)

In [8]:
liberal = pd.read_csv(liberal_url)

In [9]:
ivies = pd.read_csv(ivies_url)

# 148: Concatenating DataFrames

In [10]:
dfs = [state, eng, liberal, ivies, party]

In [11]:
for df in dfs:
  print(df.shape)

(175, 4)
(19, 4)
(47, 4)
(8, 4)
(20, 4)


In [12]:
pd.concat([ivies, eng]).shape

(27, 4)

In [13]:
ivies.shape[0] + eng.shape[0]

27

In [14]:
set(party['School Name']).difference(state['School Name'])

{'Randolph-Macon College'}

In [15]:
'Randolph-Macon College' in liberal['School Name'].values

True

In [16]:
pd.concat(dfs)[pd.concat(dfs).duplicated(subset=['School Name'], keep='first')]

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,University of Illinois at Urbana-Champaign (UIUC),Party,"$52,900.00","$96,100.00"
1,"University of Maryland, College Park",Party,"$52,000.00","$95,000.00"
2,"University of California, Santa Barbara (UCSB)",Party,"$50,500.00","$95,000.00"
3,University of Texas (UT) - Austin,Party,"$49,700.00","$93,900.00"
4,State University of New York (SUNY) at Albany,Party,"$44,500.00","$92,200.00"
5,University of Florida (UF),Party,"$47,100.00","$87,900.00"
6,Louisiana State University (LSU),Party,"$46,900.00","$87,800.00"
7,University of Georgia (UGA),Party,"$44,100.00","$86,000.00"
8,Pennsylvania State University (PSU),Party,"$49,900.00","$85,700.00"
9,Arizona State University (ASU),Party,"$47,400.00","$84,100.00"


In [17]:
schools = pd.concat(dfs).drop_duplicates(subset='School Name')

# 149:The Duplicated Index Issue

In [18]:
schools.loc[0]

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
0,Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00"
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"


In [19]:
# pd.concat() -> does not discard the original index of the dataframes being concatenated!

In [20]:
schools.index.duplicated()

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [21]:
# in pandas, we could have duplicated indices!

In [22]:
# schools.loc[0:2] # does not work anymore

In [23]:
# the first fix

In [24]:
schools.reset_index(drop=True, inplace=True)

In [25]:
schools.index.duplicated().sum()

0

In [26]:
# the alternative approach: concat()

In [27]:
pd.concat(dfs, ignore_index=True).drop_duplicates(subset=['School Name']).index.duplicated().sum()

0

# 150:Enforcing Unique Indices

In [28]:
# Previously: df.reset_index(drop=True) OR pd.concat(ignore_index=True)

# GOAL: What if we wanted to preserve the index AND force uniqueness

In [29]:
ivies

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
5,Cornell University,Ivy League,"$60,300.00","$110,000.00"
6,Brown University,Ivy League,"$56,200.00","$109,000.00"
7,Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [30]:
ivies2 = ivies.set_index('School Name')

In [31]:
eng2 = eng.set_index('School Name')

In [32]:
eng2.head()

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00"
Harvey Mudd College,Engineering,"$71,800.00","$122,000.00"
"Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00"
Cooper Union,Engineering,"$62,200.00","$114,000.00"


In [33]:
pd.concat([ivies2, eng2], ignore_index=True)

Unnamed: 0,School Type,Starting Median Salary,Mid-Career Median Salary
0,Ivy League,"$58,000.00","$134,000.00"
1,Ivy League,"$66,500.00","$131,000.00"
2,Ivy League,"$59,100.00","$126,000.00"
3,Ivy League,"$63,400.00","$124,000.00"
4,Ivy League,"$60,900.00","$120,000.00"
5,Ivy League,"$60,300.00","$110,000.00"
6,Ivy League,"$56,200.00","$109,000.00"
7,Ivy League,"$59,400.00","$107,000.00"
8,Engineering,"$72,200.00","$126,000.00"
9,Engineering,"$75,500.00","$123,000.00"


In [34]:
pd.concat([ivies2, eng2], verify_integrity=True)

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
Princeton University,Ivy League,"$66,500.00","$131,000.00"
Yale University,Ivy League,"$59,100.00","$126,000.00"
Harvard University,Ivy League,"$63,400.00","$124,000.00"
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
Cornell University,Ivy League,"$60,300.00","$110,000.00"
Brown University,Ivy League,"$56,200.00","$109,000.00"
Columbia University,Ivy League,"$59,400.00","$107,000.00"
Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00"


In [35]:
random_eng_school = eng2.sample()

In [36]:
random_eng_school

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"


In [37]:
ivies2 = ivies2.append(random_eng_school)

In [38]:
# pd.concat([ivies2, eng2], verify_integrity=True)

# 151:BONUS - Creating Multiple Indicates With concat()

In [39]:
# Previously: pd.concat(ignore_index=True)

In [40]:
# How about a MultiIndex?

In [41]:
new_df = pd.concat([ivies, eng], keys=['ivyleague_schools', 'engineering_schools'])

In [42]:
type(new_df.index)

pandas.core.indexes.multi.MultiIndex

In [43]:
new_df.head()

Unnamed: 0,Unnamed: 1,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
ivyleague_schools,0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
ivyleague_schools,1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
ivyleague_schools,2,Yale University,Ivy League,"$59,100.00","$126,000.00"
ivyleague_schools,3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
ivyleague_schools,4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"


In [44]:
new_df.loc[('ivyleague_schools', 3)]

School Name                 Harvard University
School Type                         Ivy League
Starting Median Salary             $63,400.00 
Mid-Career Median Salary          $124,000.00 
Name: (ivyleague_schools, 3), dtype: object

In [45]:
new_df.iloc[3]

School Name                 Harvard University
School Type                         Ivy League
Starting Median Salary             $63,400.00 
Mid-Career Median Salary          $124,000.00 
Name: (ivyleague_schools, 3), dtype: object

## 152: Column Axis Concatenation

In [46]:
#### previously:
# dfs = [state, eng, liberal, ivies, party]
# schools = pd.concat(dfs)

In [47]:
schools

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00"
...,...,...,...,...
244,Harvard University,Ivy League,"$63,400.00","$124,000.00"
245,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
246,Cornell University,Ivy League,"$60,300.00","$110,000.00"
247,Brown University,Ivy League,"$56,200.00","$109,000.00"


In [48]:
ivies3 = ivies.sort_values(by=['Starting Median Salary'], ascending=False)[:5].reset_index(drop=True)

In [49]:
eng3 = eng.sort_values(by=['Starting Median Salary'], ascending=False)[:5].reset_index(drop=True)

In [50]:
pd.concat([ivies3, eng3], axis=1)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,School Name.1,School Type.1,Starting Median Salary.1,Mid-Career Median Salary.1
0,Princeton University,Ivy League,"$66,500.00","$131,000.00",California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00"
1,Harvard University,Ivy League,"$63,400.00","$124,000.00",Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
2,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Harvey Mudd College,Engineering,"$71,800.00","$122,000.00"
3,Cornell University,Ivy League,"$60,300.00","$110,000.00","Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00"
4,Columbia University,Ivy League,"$59,400.00","$107,000.00",Cooper Union,Engineering,"$62,200.00","$114,000.00"


# 153:The append() Method: A Special Case Of concat()

In [51]:
liberal.append(party)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00"
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00"
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00"
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00"
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00"
...,...,...,...,...
15,University of New Hampshire (UNH),Party,"$41,800.00","$78,300.00"
16,West Virginia University (WVU),Party,"$43,100.00","$78,100.00"
17,University of Tennessee,Party,"$43,800.00","$74,600.00"
18,Ohio University,Party,"$42,200.00","$73,400.00"


In [52]:
pd.concat([liberal, party])

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00"
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00"
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00"
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00"
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00"
...,...,...,...,...
15,University of New Hampshire (UNH),Party,"$41,800.00","$78,300.00"
16,West Virginia University (WVU),Party,"$43,100.00","$78,100.00"
17,University of Tennessee,Party,"$43,800.00","$74,600.00"
18,Ohio University,Party,"$42,200.00","$73,400.00"


# 154: Concat On Different Columns

In [53]:
ivies.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"


In [54]:
eng.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
1,California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00"
2,Harvey Mudd College,Engineering,"$71,800.00","$122,000.00"
3,"Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00"
4,Cooper Union,Engineering,"$62,200.00","$114,000.00"


In [55]:
# add STEM column

In [56]:
eng4 = eng.copy()

In [57]:
eng4['STEM'] = True

In [58]:
eng4.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,STEM
0,Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00",True
1,California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00",True
2,Harvey Mudd College,Engineering,"$71,800.00","$122,000.00",True
3,"Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00",True
4,Cooper Union,Engineering,"$62,200.00","$114,000.00",True


In [59]:
pd.concat([ivies, eng4], join='outer')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,STEM
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",
2,Yale University,Ivy League,"$59,100.00","$126,000.00",
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",
5,Cornell University,Ivy League,"$60,300.00","$110,000.00",
6,Brown University,Ivy League,"$56,200.00","$109,000.00",
7,Columbia University,Ivy League,"$59,400.00","$107,000.00",
0,Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00",True
1,California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00",True


In [60]:
pd.concat([ivies, eng4], join='inner')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
5,Cornell University,Ivy League,"$60,300.00","$110,000.00"
6,Brown University,Ivy League,"$56,200.00","$109,000.00"
7,Columbia University,Ivy League,"$59,400.00","$107,000.00"
0,Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00"
1,California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00"


# 155:Skill Challenge

###### **1.**

Concatenate the *liberal* and *state* schools into a new dataframe. How many unique school names are there?

###### **2.**

What is the average median starting salary in the dataframe created above?

###### **3.**

Create a short dataframe that shows the top 3 *liberal* arts and *state* schools that produce the highest (mid-career) earning graduates. 

Show the *School Name* and *Mid-Career Median Salary* columns from each dataset, side by side, i.e. <ins>horizontally</ins>.

**BONUS**: nest the column labels within 'Liberal Arts' and 'State' labels.

# 156: Solution

In [61]:
# 1

In [62]:
dfc = pd.concat([liberal, state])

In [63]:
dfc.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00"
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00"
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00"
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00"
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00"


In [64]:
dfc.nunique()

School Name                 222
School Type                   2
Starting Median Salary      122
Mid-Career Median Salary    155
dtype: int64

In [65]:
dfc['School Name'].nunique()

222

In [66]:
liberal.shape[0] + state.shape[0]

222

In [67]:
# 2

In [68]:
dfc.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00"
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00"
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00"
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00"
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00"


In [69]:
dfc.iloc[:, 3].replace(r'\$|,', '', regex=True).astype(float).mean()

80856.3063063063

In [70]:
# 3

In [71]:
lib2 = liberal.sort_values(by=['Mid-Career Median Salary'], ascending=False)\
       .iloc[:3, [0, 3]].reset_index(drop=True)

In [72]:
state2 = state.sort_values(by=['Mid-Career Median Salary'], ascending=False)\
       .iloc[:3, [0, 3]].reset_index(drop=True)

In [73]:
pd.concat([lib2, state2], axis=1, keys=['Liberal Arts', 'State'])

Unnamed: 0_level_0,Liberal Arts,Liberal Arts,State,State
Unnamed: 0_level_1,School Name,Mid-Career Median Salary,School Name,Mid-Career Median Salary
0,"Wesleyan University (Middletown, Connecticut)","$97,900.00","University of California, Davis","$99,600.00"
1,Bates College,"$96,500.00",University of Colorado - Boulder (UCB),"$97,600.00"
2,Union College,"$95,800.00","University of California, Irvine (UCI)","$96,700.00"


# 157: The merge() Method

In [74]:
# merge() is similar to SQL!

In [75]:
# regional information on the schools

In [76]:
regions_url = 'https://andybek.com/pandas-regions'

In [77]:
regions = pd.read_csv(regions_url)

In [78]:
regions.head()

Unnamed: 0,School Name,Region
0,Massachusetts Institute of Technology (MIT),Northeastern
1,California Institute of Technology (CIT),California
2,Harvey Mudd College,California
3,"Polytechnic University of New York, Brooklyn",Northeastern
4,Cooper Union,Northeastern


In [79]:
regions.shape

(269, 2)

In [80]:
schools.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00"


In [81]:
pd.merge(schools, regions)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00",California
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00",Southern
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00",California
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00",California
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00",California
...,...,...,...,...,...
264,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
265,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
266,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
267,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern


In [82]:
pd.merge(schools, regions, on='School Name')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00",California
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00",Southern
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00",California
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00",California
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00",California
...,...,...,...,...,...
264,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
265,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
266,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
267,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern


# 158: The left_on And right_on Params

In [83]:
income_url = 'https://andybek.com/pandas-mid'

In [84]:
mid_career = pd.read_csv(income_url)

In [85]:
mid_career.head()

Unnamed: 0,school_name,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
0,Massachusetts Institute of Technology (MIT),"$76,800.00","$99,200.00","$168,000.00","$220,000.00"
1,California Institute of Technology (CIT),,"$104,000.00","$161,000.00",
2,Harvey Mudd College,,"$96,000.00","$180,000.00",
3,"Polytechnic University of New York, Brooklyn","$66,800.00","$94,300.00","$143,000.00","$190,000.00"
4,Cooper Union,,"$80,200.00","$142,000.00",


In [86]:
schools.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00"


In [87]:
# pd.merge(schools, mid_career)

In [88]:
pd.merge(schools, mid_career, left_on='School Name', right_on='school_name')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,school_name,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00","University of California, Berkeley","$59,500.00","$81,000.00","$149,000.00","$201,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00",University of Virginia (UVA),"$52,200.00","$71,800.00","$146,000.00","$215,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00",Cal Poly San Luis Obispo,"$55,000.00","$74,700.00","$133,000.00","$178,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00",University of California at Los Angeles (UCLA),"$51,300.00","$72,500.00","$139,000.00","$193,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00","University of California, San Diego (UCSD)","$51,700.00","$75,400.00","$131,000.00","$177,000.00"
...,...,...,...,...,...,...,...,...,...
264,Harvard University,Ivy League,"$63,400.00","$124,000.00",Harvard University,"$54,800.00","$86,200.00","$179,000.00","$288,000.00"
265,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",University of Pennsylvania,"$55,900.00","$79,200.00","$192,000.00","$282,000.00"
266,Cornell University,Ivy League,"$60,300.00","$110,000.00",Cornell University,"$56,800.00","$79,800.00","$160,000.00","$210,000.00"
267,Brown University,Ivy League,"$56,200.00","$109,000.00",Brown University,"$55,400.00","$74,400.00","$159,000.00","$228,000.00"


In [89]:
pd.merge(schools, mid_career, left_on='School Name', right_on='school_name').drop('school_name', axis=1)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00","$59,500.00","$81,000.00","$149,000.00","$201,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00","$52,200.00","$71,800.00","$146,000.00","$215,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00","$55,000.00","$74,700.00","$133,000.00","$178,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00","$51,300.00","$72,500.00","$139,000.00","$193,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00","$51,700.00","$75,400.00","$131,000.00","$177,000.00"
...,...,...,...,...,...,...,...,...
264,Harvard University,Ivy League,"$63,400.00","$124,000.00","$54,800.00","$86,200.00","$179,000.00","$288,000.00"
265,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00","$55,900.00","$79,200.00","$192,000.00","$282,000.00"
266,Cornell University,Ivy League,"$60,300.00","$110,000.00","$56,800.00","$79,800.00","$160,000.00","$210,000.00"
267,Brown University,Ivy League,"$56,200.00","$109,000.00","$55,400.00","$74,400.00","$159,000.00","$228,000.00"


# 159: Inner vs Outer Joins

In [90]:
# the how param

In [91]:
pd.merge(ivies, regions, how='inner')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
2,Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
5,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
6,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
7,Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern


In [92]:
regions.shape

(269, 2)

In [93]:
pd.merge(ivies, regions, how='outer')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
2,Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
...,...,...,...,...,...
264,Austin Peay State University,,,,Southern
265,Pittsburg State University,,,,Midwestern
266,Southern Utah University,,,,Western
267,Montana State University - Billings,,,,Western


In [94]:
# ===ASIDE

In [95]:
a = {1,2,3}
b = {4,9,2}

In [96]:
a.union(b)

{1, 2, 3, 4, 9}

# 160: Left vs Right Joins

In [97]:
pd.merge(ivies, regions, how='left')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
2,Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
5,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
6,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
7,Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern


In [98]:
pd.merge(ivies, regions, how='right')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Massachusetts Institute of Technology (MIT),,,,Northeastern
1,California Institute of Technology (CIT),,,,California
2,Harvey Mudd College,,,,California
3,"Polytechnic University of New York, Brooklyn",,,,Northeastern
4,Cooper Union,,,,Northeastern
...,...,...,...,...,...
264,Austin Peay State University,,,,Southern
265,Pittsburg State University,,,,Midwestern
266,Southern Utah University,,,,Western
267,Montana State University - Billings,,,,Western


In [99]:
# these are equivalent, or the same:

In [100]:
# pd.merge(ivies, regions, how='left')

In [101]:
# pd.merge(regions, ivies, how='right')

# 161: One-to-One and One-to-Many Joins

In [102]:
# 1-1

In [103]:
ivies

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
5,Cornell University,Ivy League,"$60,300.00","$110,000.00"
6,Brown University,Ivy League,"$56,200.00","$109,000.00"
7,Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [104]:
regions.head()

Unnamed: 0,School Name,Region
0,Massachusetts Institute of Technology (MIT),Northeastern
1,California Institute of Technology (CIT),California
2,Harvey Mudd College,California
3,"Polytechnic University of New York, Brooklyn",Northeastern
4,Cooper Union,Northeastern


In [105]:
regions[regions['School Name'].isin(ivies['School Name'])]

Unnamed: 0,School Name,Region
86,Dartmouth College,Northeastern
87,Princeton University,Northeastern
88,Yale University,Northeastern
89,Harvard University,Northeastern
90,University of Pennsylvania,Northeastern
91,Cornell University,Northeastern
92,Brown University,Northeastern
93,Columbia University,Northeastern


In [106]:
pd.merge(ivies, regions, how='inner', on='School Name')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
2,Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
5,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
6,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
7,Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern


In [107]:
# 1-M

In [108]:
state

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00"
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00"
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00"
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00"
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00"
...,...,...,...,...
170,Austin Peay State University,State,"$37,700.00","$59,200.00"
171,Pittsburg State University,State,"$40,400.00","$58,200.00"
172,Southern Utah University,State,"$41,900.00","$56,500.00"
173,Montana State University - Billings,State,"$37,900.00","$50,600.00"


In [109]:
state['School Name'].is_unique

True

In [110]:
regions[regions['School Name'].isin(state['School Name'])]

Unnamed: 0,School Name,Region
19,University of Illinois at Urbana-Champaign (UIUC),Midwestern
20,"University of Maryland, College Park",Southern
21,"University of California, Santa Barbara (UCSB)",California
22,University of Texas (UT) - Austin,Southern
23,State University of New York (SUNY) at Albany,Northeastern
...,...,...
264,Austin Peay State University,Southern
265,Pittsburg State University,Midwestern
266,Southern Utah University,Western
267,Montana State University - Billings,Western


In [111]:
regions[regions['School Name'].isin(state['School Name'])].loc[:, 'School Name'].value_counts()

University of Illinois at Urbana-Champaign (UIUC)    2
Indiana University (IU), Bloomington                 2
University of Maryland, College Park                 2
Ohio University                                      2
University of Tennessee                              2
                                                    ..
University of Illinois at Chicago                    1
State University of New York (SUNY) at Buffalo       1
University of Kansas                                 1
University of New Mexico (UNM)                       1
Black Hills State University                         1
Name: School Name, Length: 175, dtype: int64

In [112]:
pd.merge(state, regions, how='inner', on='School Name').sort_values(by='School Name')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
169,Appalachian State University,State,"$40,400.00","$69,100.00",Southern
58,Arizona State University (ASU),State,"$47,400.00","$84,100.00",Western
57,Arizona State University (ASU),State,"$47,400.00","$84,100.00",Western
184,Arkansas State University (ASU),State,"$38,700.00","$63,300.00",Southern
48,Auburn University,State,"$45,400.00","$84,700.00",Southern
...,...,...,...,...,...
107,West Virginia University (WVU),State,"$43,100.00","$78,100.00",Southern
108,West Virginia University (WVU),State,"$43,100.00","$78,100.00",Southern
175,Western Carolina University,State,"$36,900.00","$66,600.00",Southern
131,Western Michigan University (WMU),State,"$42,300.00","$73,800.00",Midwestern


In [113]:
pd.merge(state, regions, how='inner', on='School Name').sort_values(by='School Name').drop_duplicates()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
169,Appalachian State University,State,"$40,400.00","$69,100.00",Southern
58,Arizona State University (ASU),State,"$47,400.00","$84,100.00",Western
184,Arkansas State University (ASU),State,"$38,700.00","$63,300.00",Southern
48,Auburn University,State,"$45,400.00","$84,700.00",Southern
189,Austin Peay State University,State,"$37,700.00","$59,200.00",Southern
...,...,...,...,...,...
117,Wayne State University,State,"$42,800.00","$76,100.00",Midwestern
107,West Virginia University (WVU),State,"$43,100.00","$78,100.00",Southern
175,Western Carolina University,State,"$36,900.00","$66,600.00",Southern
131,Western Michigan University (WMU),State,"$42,300.00","$73,800.00",Midwestern


In [114]:
pd.merge(state, regions.drop_duplicates())

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,"University of California, Berkeley",State,"$59,900.00","$112,000.00",California
1,University of Virginia (UVA),State,"$52,700.00","$103,000.00",Southern
2,Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00",California
3,University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00",California
4,"University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00",California
...,...,...,...,...,...
170,Austin Peay State University,State,"$37,700.00","$59,200.00",Southern
171,Pittsburg State University,State,"$40,400.00","$58,200.00",Midwestern
172,Southern Utah University,State,"$41,900.00","$56,500.00",Western
173,Montana State University - Billings,State,"$37,900.00","$50,600.00",Western


162: Many-to-Many Joins

In [115]:
# survey data -> what is the prestige of an ivy or eng degree

In [116]:
survey = pd.DataFrame({
    'School Type': ['Ivy League','Ivy League', 'Engineering', 'Engineering'],
    'Prestige': ['High', 'Good', 'Good', 'Okay'],
    'Respondent': [1,2,3,4]
})

In [117]:
survey = survey.append(pd.Series({'School Type': 'Ivy League', 'Prestige': 'Very High', 'Respondent': 5}, name=4))

In [118]:
survey

Unnamed: 0,School Type,Prestige,Respondent
0,Ivy League,High,1
1,Ivy League,Good,2
2,Engineering,Good,3
3,Engineering,Okay,4
4,Ivy League,Very High,5


In [119]:
ivies

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
5,Cornell University,Ivy League,"$60,300.00","$110,000.00"
6,Brown University,Ivy League,"$56,200.00","$109,000.00"
7,Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [120]:
pd.merge(ivies, survey)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Prestige,Respondent
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",High,1
1,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Good,2
2,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Very High,5
3,Princeton University,Ivy League,"$66,500.00","$131,000.00",High,1
4,Princeton University,Ivy League,"$66,500.00","$131,000.00",Good,2
5,Princeton University,Ivy League,"$66,500.00","$131,000.00",Very High,5
6,Yale University,Ivy League,"$59,100.00","$126,000.00",High,1
7,Yale University,Ivy League,"$59,100.00","$126,000.00",Good,2
8,Yale University,Ivy League,"$59,100.00","$126,000.00",Very High,5
9,Harvard University,Ivy League,"$63,400.00","$124,000.00",High,1


In [121]:
survey

Unnamed: 0,School Type,Prestige,Respondent
0,Ivy League,High,1
1,Ivy League,Good,2
2,Engineering,Good,3
3,Engineering,Okay,4
4,Ivy League,Very High,5


In [122]:
pd.merge(ivies, survey)

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Prestige,Respondent
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",High,1
1,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Good,2
2,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Very High,5
3,Princeton University,Ivy League,"$66,500.00","$131,000.00",High,1
4,Princeton University,Ivy League,"$66,500.00","$131,000.00",Good,2
5,Princeton University,Ivy League,"$66,500.00","$131,000.00",Very High,5
6,Yale University,Ivy League,"$59,100.00","$126,000.00",High,1
7,Yale University,Ivy League,"$59,100.00","$126,000.00",Good,2
8,Yale University,Ivy League,"$59,100.00","$126,000.00",Very High,5
9,Harvard University,Ivy League,"$63,400.00","$124,000.00",High,1


# 163: Merging By Index

Column joins options:
 - automatic 
 - the *on* parameter
 - *left_on*, *right_on* params




How about merging **by index**?

In [123]:
ivies4 = ivies.set_index('School Name')

In [124]:
regions2 = regions.set_index('School Name')

In [125]:
ivies4.head()

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
Princeton University,Ivy League,"$66,500.00","$131,000.00"
Yale University,Ivy League,"$59,100.00","$126,000.00"
Harvard University,Ivy League,"$63,400.00","$124,000.00"
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"


In [126]:
regions2.head()

Unnamed: 0_level_0,Region
School Name,Unnamed: 1_level_1
Massachusetts Institute of Technology (MIT),Northeastern
California Institute of Technology (CIT),California
Harvey Mudd College,California
"Polytechnic University of New York, Brooklyn",Northeastern
Cooper Union,Northeastern


In [127]:
pd.merge(ivies4, regions2, left_index=True, right_index=True)

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary,Region
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern
Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern


In [128]:
# How about merging by index vs column?

In [129]:
regions.head()

Unnamed: 0,School Name,Region
0,Massachusetts Institute of Technology (MIT),Northeastern
1,California Institute of Technology (CIT),California
2,Harvey Mudd College,California
3,"Polytechnic University of New York, Brooklyn",Northeastern
4,Cooper Union,Northeastern


In [130]:
ivies4

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
Princeton University,Ivy League,"$66,500.00","$131,000.00"
Yale University,Ivy League,"$59,100.00","$126,000.00"
Harvard University,Ivy League,"$63,400.00","$124,000.00"
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
Cornell University,Ivy League,"$60,300.00","$110,000.00"
Brown University,Ivy League,"$56,200.00","$109,000.00"
Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [131]:
pd.merge(ivies4, regions, left_index=True, right_on='School Name')

Unnamed: 0,School Type,Starting Median Salary,Mid-Career Median Salary,School Name,Region
86,Ivy League,"$58,000.00","$134,000.00",Dartmouth College,Northeastern
87,Ivy League,"$66,500.00","$131,000.00",Princeton University,Northeastern
88,Ivy League,"$59,100.00","$126,000.00",Yale University,Northeastern
89,Ivy League,"$63,400.00","$124,000.00",Harvard University,Northeastern
90,Ivy League,"$60,900.00","$120,000.00",University of Pennsylvania,Northeastern
91,Ivy League,"$60,300.00","$110,000.00",Cornell University,Northeastern
92,Ivy League,"$56,200.00","$109,000.00",Brown University,Northeastern
93,Ivy League,"$59,400.00","$107,000.00",Columbia University,Northeastern


# 164: The join() Method

In [132]:
ivies4

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
Princeton University,Ivy League,"$66,500.00","$131,000.00"
Yale University,Ivy League,"$59,100.00","$126,000.00"
Harvard University,Ivy League,"$63,400.00","$124,000.00"
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
Cornell University,Ivy League,"$60,300.00","$110,000.00"
Brown University,Ivy League,"$56,200.00","$109,000.00"
Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [133]:
regions2.head()

Unnamed: 0_level_0,Region
School Name,Unnamed: 1_level_1
Massachusetts Institute of Technology (MIT),Northeastern
California Institute of Technology (CIT),California
Harvey Mudd College,California
"Polytechnic University of New York, Brooklyn",Northeastern
Cooper Union,Northeastern


In [134]:
ivies4.join(regions2)

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary,Region
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern
Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern


In [135]:
pd.merge(ivies4, regions2, right_index=True, left_index=True)

Unnamed: 0_level_0,School Type,Starting Median Salary,Mid-Career Median Salary,Region
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern
Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern


In [136]:
ivies

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00"
1,Princeton University,Ivy League,"$66,500.00","$131,000.00"
2,Yale University,Ivy League,"$59,100.00","$126,000.00"
3,Harvard University,Ivy League,"$63,400.00","$124,000.00"
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00"
5,Cornell University,Ivy League,"$60,300.00","$110,000.00"
6,Brown University,Ivy League,"$56,200.00","$109,000.00"
7,Columbia University,Ivy League,"$59,400.00","$107,000.00"


In [137]:
regions2.head()

Unnamed: 0_level_0,Region
School Name,Unnamed: 1_level_1
Massachusetts Institute of Technology (MIT),Northeastern
California Institute of Technology (CIT),California
Harvey Mudd College,California
"Polytechnic University of New York, Brooklyn",Northeastern
Cooper Union,Northeastern


In [138]:
ivies.join(regions2, on='School Name')

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Dartmouth College,Ivy League,"$58,000.00","$134,000.00",Northeastern
1,Princeton University,Ivy League,"$66,500.00","$131,000.00",Northeastern
2,Yale University,Ivy League,"$59,100.00","$126,000.00",Northeastern
3,Harvard University,Ivy League,"$63,400.00","$124,000.00",Northeastern
4,University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00",Northeastern
5,Cornell University,Ivy League,"$60,300.00","$110,000.00",Northeastern
6,Brown University,Ivy League,"$56,200.00","$109,000.00",Northeastern
7,Columbia University,Ivy League,"$59,400.00","$107,000.00",Northeastern


#### Skill Challenge

###### **1.**

Merge *liberal* arts schools with *regions* and assign the resulting dataframe to *dfm*. What region has the highest number of liberal arts schools?

###### **2.**

Set *school_name* as the index of the *mid_career* dataframe. Do the operation inplace.

###### **3.**

Merge the *dfm* and *mid_career* dataframes. Is the join operation one-to-one?

#### Solution

In [139]:
# 1

In [140]:
dfm = pd.merge(liberal, regions)

In [141]:
dfm.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00",Northeastern
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00",Northeastern
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00",Northeastern
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00",Northeastern
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00",Northeastern


In [142]:
dfm['School Type'].nunique()

1

In [143]:
dfm.Region.value_counts()

Northeastern    25
Midwestern       8
Western          7
Southern         5
California       3
Name: Region, dtype: int64

In [144]:
# 2

In [145]:
mid_career.head()

Unnamed: 0,school_name,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
0,Massachusetts Institute of Technology (MIT),"$76,800.00","$99,200.00","$168,000.00","$220,000.00"
1,California Institute of Technology (CIT),,"$104,000.00","$161,000.00",
2,Harvey Mudd College,,"$96,000.00","$180,000.00",
3,"Polytechnic University of New York, Brooklyn","$66,800.00","$94,300.00","$143,000.00","$190,000.00"
4,Cooper Union,,"$80,200.00","$142,000.00",


In [146]:
mid_career.set_index('school_name', inplace=True)

In [147]:
mid_career.head()

Unnamed: 0_level_0,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Massachusetts Institute of Technology (MIT),"$76,800.00","$99,200.00","$168,000.00","$220,000.00"
California Institute of Technology (CIT),,"$104,000.00","$161,000.00",
Harvey Mudd College,,"$96,000.00","$180,000.00",
"Polytechnic University of New York, Brooklyn","$66,800.00","$94,300.00","$143,000.00","$190,000.00"
Cooper Union,,"$80,200.00","$142,000.00",


In [148]:
# 3

In [149]:
dfm.head()

Unnamed: 0,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
0,Bucknell University,Liberal Arts,"$54,100.00","$110,000.00",Northeastern
1,Colgate University,Liberal Arts,"$52,800.00","$108,000.00",Northeastern
2,Amherst College,Liberal Arts,"$54,500.00","$107,000.00",Northeastern
3,Lafayette College,Liberal Arts,"$53,900.00","$107,000.00",Northeastern
4,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00",Northeastern


In [150]:
pd.merge(mid_career, dfm, left_index=True, right_on='School Name')

Unnamed: 0,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary,School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Region
28,,"$54,100.00","$123,000.00",,Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",Southern
29,,"$54,100.00","$123,000.00",,Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",Southern
28,,"$54,100.00","$123,000.00",,Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",Southern
29,,"$54,100.00","$123,000.00",,Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",Southern
0,"$62,800.00","$80,600.00","$156,000.00","$251,000.00",Bucknell University,Liberal Arts,"$54,100.00","$110,000.00",Northeastern
1,"$60,000.00","$76,700.00","$167,000.00","$265,000.00",Colgate University,Liberal Arts,"$52,800.00","$108,000.00",Northeastern
2,,"$84,900.00","$162,000.00",,Amherst College,Liberal Arts,"$54,500.00","$107,000.00",Northeastern
3,"$70,600.00","$79,300.00","$144,000.00","$204,000.00",Lafayette College,Liberal Arts,"$53,900.00","$107,000.00",Northeastern
4,,"$74,600.00","$146,000.00",,Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00",Northeastern
5,,"$65,600.00","$143,000.00",,College of the Holy Cross,Liberal Arts,"$50,200.00","$106,000.00",Northeastern


In [151]:
# Is it 1-1

In [152]:
left_key = mid_career.index

In [153]:
right_key = dfm['School Name'] # liberal arts inner join regions

In [154]:
left_key[left_key.isin(right_key)].value_counts()

Randolph-Macon College                           2
Gustavus Adolphus College                        1
Siena College                                    1
Smith College                                    1
Hamilton College                                 1
Wellesley College                                1
Denison University                               1
Oberlin College                                  1
University of Puget Sound                        1
Colorado College (CC)                            1
Reed College                                     1
Whitman College                                  1
Colby College                                    1
Ursinus College                                  1
Juniata College                                  1
Wittenberg University                            1
Grinnell College                                 1
Skidmore College                                 1
Moravian College                                 1
Lewis & Clark College          

# python fancy codes:

#Python let's you play the system bell sound. 🔔🐍

This can be a useful way to get the user's attention in your program! ⚠️

# play the system bell sound.

In [161]:
>>> print("\a")  #OR   
print("\a")



