In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
sns.set()

### Merging DataFrames

In [3]:
population = pd.read_csv('pa_zipcode_population.csv')
print(population)

   Zipcode  2010 Census Population
0    16855                     282
1    15681                    5241
2    18657                   11985
3    17307                    5899
4    15635                     220


In [4]:
cities = pd.read_csv('pa_zipcode_city.csv')
print(cities)

    Zipcode             City State
0     17545          MANHEIM    PA
1     18455     PRESTON PARK    PA
2     17307      BIGLERVILLE    PA
3     15705          INDIANA    PA
4     16833     CURWENSVILLE    PA
5     16220            CROWN    PA
6     18618     HARVEYS LAKE    PA
7     16855  MINERAL SPRINGS    PA
8     16623        CASSVILLE    PA
9     15635       HANNASTOWN    PA
10    15681        SALTSBURG    PA
11    18657      TUNKHANNOCK    PA
12    15279       PITTSBURGH    PA
13    17231        LEMASTERS    PA
14    18821       GREAT BEND    PA


In [5]:
pd.merge(population, cities)

Unnamed: 0,Zipcode,2010 Census Population,City,State
0,16855,282,MINERAL SPRINGS,PA
1,15681,5241,SALTSBURG,PA
2,18657,11985,TUNKHANNOCK,PA
3,17307,5899,BIGLERVILLE,PA
4,15635,220,HANNASTOWN,PA


In [6]:
bronze = pd.read_csv('summer_olympic_medals/bronze_top5.csv')
bronze.insert(loc=0, column='NOC', value=['USA','URS','GBR','FRA','GER'])                                                                  
silver = pd.read_csv('summer_olympic_medals/silver_top5.csv')
gold = pd.read_csv('summer_olympic_medals/gold_top5.csv')
gold.insert(loc=0, column='NOC', value=['USA','URS','GBR','ITA','GER'])
print(bronze)
print(gold)

   NOC         Country   Total
0  USA   United States  1052.0
1  URS    Soviet Union   584.0
2  GBR  United Kingdom   505.0
3  FRA          France   475.0
4  GER         Germany   454.0
   NOC         Country   Total
0  USA   United States  2088.0
1  URS    Soviet Union   838.0
2  GBR  United Kingdom   498.0
3  ITA           Italy   460.0
4  GER         Germany   407.0


In [7]:
pd.merge(bronze, gold)

Unnamed: 0,NOC,Country,Total


In [8]:
pd.merge(bronze, gold, on='NOC')

Unnamed: 0,NOC,Country_x,Total_x,Country_y,Total_y
0,USA,United States,1052.0,United States,2088.0
1,URS,Soviet Union,584.0,Soviet Union,838.0
2,GBR,United Kingdom,505.0,United Kingdom,498.0
3,GER,Germany,454.0,Germany,407.0


In [9]:
pd.merge(bronze, gold, on=['NOC','Country'])

Unnamed: 0,NOC,Country,Total_x,Total_y
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,GER,Germany,454.0,407.0


In [10]:
pd.merge(bronze, gold, on=['NOC','Country'], suffixes=['_bronze','_gold'])

Unnamed: 0,NOC,Country,Total_bronze,Total_gold
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,GER,Germany,454.0,407.0


In [11]:
counties = pd.read_csv('pa_counties.csv')
print(counties)

         CITY NAME   COUNTY NAME
0        SALTSBURG       INDIANA
1  MINERAL SPRINGS    CLEARFIELD
2      BIGLERVILLE         ADAMS
3       HANNASTOWN  WESTMORELAND
4      TUNKHANNOCK       WYOMING


In [12]:
print(cities.tail())

    Zipcode         City State
10    15681    SALTSBURG    PA
11    18657  TUNKHANNOCK    PA
12    15279   PITTSBURGH    PA
13    17231    LEMASTERS    PA
14    18821   GREAT BEND    PA


In [13]:
pd.merge(counties, cities, left_on='CITY NAME', right_on='City')

Unnamed: 0,CITY NAME,COUNTY NAME,Zipcode,City,State
0,SALTSBURG,INDIANA,15681,SALTSBURG,PA
1,MINERAL SPRINGS,CLEARFIELD,16855,MINERAL SPRINGS,PA
2,BIGLERVILLE,ADAMS,17307,BIGLERVILLE,PA
3,HANNASTOWN,WESTMORELAND,15635,HANNASTOWN,PA
4,TUNKHANNOCK,WYOMING,18657,TUNKHANNOCK,PA


In [14]:
revenue = pd.read_csv('revenue.csv')
revenue

Unnamed: 0,city,branch_id,revenue
0,Austin,10,100
1,Denver,20,83
2,Springfield,30,4
3,Mendocino,47,200


In [15]:
managers = pd.read_csv('managers.csv')
managers

Unnamed: 0,city,branch_id,manager
0,Austin,10,Charles
1,Denver,20,Joel
2,Mendocino,47,Brett
3,Springfield,31,Sally


In [16]:
merge_by_city = pd.merge(revenue, managers, on='city')
merge_by_city

Unnamed: 0,city,branch_id_x,revenue,branch_id_y,manager
0,Austin,10,100,10,Charles
1,Denver,20,83,20,Joel
2,Springfield,30,4,31,Sally
3,Mendocino,47,200,47,Brett


In [17]:
merge_by_id = pd.merge(revenue, managers, on='branch_id')
merge_by_id

Unnamed: 0,city_x,branch_id,revenue,city_y,manager
0,Austin,10,100,Austin,Charles
1,Denver,20,83,Denver,Joel
2,Mendocino,47,200,Mendocino,Brett


In [18]:
managers = managers.rename(index=str, columns={'city':'branch'})
managers.insert(loc=2, column='state', value=['TX','CO','CA','IL'])
managers

Unnamed: 0,branch,branch_id,state,manager
0,Austin,10,TX,Charles
1,Denver,20,CO,Joel
2,Mendocino,47,CA,Brett
3,Springfield,31,IL,Sally


In [19]:
revenue.insert(loc=2, column='state', value=['TX','CO','MO','CA'])
revenue

Unnamed: 0,city,branch_id,state,revenue
0,Austin,10,TX,100
1,Denver,20,CO,83
2,Springfield,30,MO,4
3,Mendocino,47,CA,200


In [20]:
combined = pd.merge(revenue, managers, left_on='city', right_on='branch')
print(combined)

          city  branch_id_x state_x  revenue       branch  branch_id_y  \
0       Austin           10      TX      100       Austin           10   
1       Denver           20      CO       83       Denver           20   
2  Springfield           30      MO        4  Springfield           31   
3    Mendocino           47      CA      200    Mendocino           47   

  state_y  manager  
0      TX  Charles  
1      CO     Joel  
2      IL    Sally  
3      CA    Brett  


In [21]:
managers = managers.rename(index=str, columns={'branch':'city'})
combined = pd.merge(revenue, managers, on=['branch_id', 'city', 'state'])
combined

Unnamed: 0,city,branch_id,state,revenue,manager
0,Austin,10,TX,100,Charles
1,Denver,20,CO,83,Joel
2,Mendocino,47,CA,200,Brett


### Joining DataFrames 

In [22]:
bronze, gold

(   NOC         Country   Total
 0  USA   United States  1052.0
 1  URS    Soviet Union   584.0
 2  GBR  United Kingdom   505.0
 3  FRA          France   475.0
 4  GER         Germany   454.0,    NOC         Country   Total
 0  USA   United States  2088.0
 1  URS    Soviet Union   838.0
 2  GBR  United Kingdom   498.0
 3  ITA           Italy   460.0
 4  GER         Germany   407.0)

In [23]:
pd.merge(bronze, gold, on=['NOC','Country'], suffixes=['_bronze','_gold'], how='inner')

Unnamed: 0,NOC,Country,Total_bronze,Total_gold
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,GER,Germany,454.0,407.0


#### Merging with left join

In [24]:
pd.merge(bronze, gold, on=['NOC', 'Country'], suffixes=['_bronze','_gold'], how='left')

Unnamed: 0,NOC,Country,Total_bronze,Total_gold
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,FRA,France,475.0,
4,GER,Germany,454.0,407.0


#### Merging with right join 

In [25]:
pd.merge(bronze, gold, on=['NOC', 'Country'], suffixes=['_bronze','_gold'], how='right')

Unnamed: 0,NOC,Country,Total_bronze,Total_gold
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,GER,Germany,454.0,407.0
4,ITA,Italy,,460.0


#### Merging with outer 

In [26]:
pd.merge(bronze, gold, on=['NOC','Country'], suffixes=['_bronze','_gold'], how='outer')

Unnamed: 0,NOC,Country,Total_bronze,Total_gold
0,USA,United States,1052.0,2088.0
1,URS,Soviet Union,584.0,838.0
2,GBR,United Kingdom,505.0,498.0
3,FRA,France,475.0,
4,GER,Germany,454.0,407.0
5,ITA,Italy,,460.0


In [27]:
population = pd.read_csv('population_00.csv', index_col=0)
unemployment = pd.read_csv('unemployment_00.csv', index_col=0)
print(population)
print(unemployment)

               2010 Census Population
Zip Code ZCTA                        
57538                             322
59916                             130
37660                           40038
2860                            45199
       unemployment  participants
Zip                              
2860           0.11         34447
46167          0.02          4800
1097           0.33            42
80808          0.07          4310


#### Using .join(how='left')

In [28]:
population.join(unemployment)

Unnamed: 0_level_0,2010 Census Population,unemployment,participants
Zip Code ZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
57538,322,,
59916,130,,
37660,40038,,
2860,45199,0.11,34447.0


#### Using .join(how='right')

In [29]:
population.join(unemployment, how='right')

Unnamed: 0_level_0,2010 Census Population,unemployment,participants
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2860,45199.0,0.11,34447
46167,,0.02,4800
1097,,0.33,42
80808,,0.07,4310


#### Using .join(how='inner')

In [30]:
population.join(unemployment, how='inner')

Unnamed: 0,2010 Census Population,unemployment,participants
2860,45199,0.11,34447


#### Using .join(how='outer')

In [31]:
population.join(unemployment, how='outer')

Unnamed: 0,2010 Census Population,unemployment,participants
1097,,0.33,42.0
2860,45199.0,0.11,34447.0
37660,40038.0,,
46167,,0.02,4800.0
57538,322.0,,
59916,130.0,,
80808,,0.07,4310.0


### Which should you use?
- `df1.append(df2)`: stacking vertically
- `pd.concat([df1,df2])`: 
 - stacking many horizontally or vertically
 - simple inner/outer joins on Indexes
- `df1.join(df2)`: inner/outer/left/right joins on Indexes
- `pd.merge([df1,df2])`: many joins on multiple columns

In [32]:
managers = managers.rename(index=str, columns={'city':'branch'})
managers.set_index('branch_id', inplace=True)
managers

Unnamed: 0_level_0,branch,state,manager
branch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Austin,TX,Charles
20,Denver,CO,Joel
47,Mendocino,CA,Brett
31,Springfield,IL,Sally


In [33]:
revenue.set_index('branch_id', inplace=True)
revenue

Unnamed: 0_level_0,city,state,revenue
branch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Austin,TX,100
20,Denver,CO,83
30,Springfield,MO,4
47,Mendocino,CA,200


In [34]:
pd.merge(revenue, managers, on='branch_id')

Unnamed: 0_level_0,city,state_x,revenue,branch,state_y,manager
branch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,Austin,TX,100,Austin,TX,Charles
20,Denver,CO,83,Denver,CO,Joel
47,Mendocino,CA,200,Mendocino,CA,Brett


In [35]:
pd.merge(managers, revenue, how='left')

Unnamed: 0,branch,state,manager,city,revenue
0,Austin,TX,Charles,Austin,100.0
1,Denver,CO,Joel,Denver,83.0
2,Mendocino,CA,Brett,Mendocino,200.0
3,Springfield,IL,Sally,,


In [36]:
revenue.join(managers, lsuffix='_rev', rsuffix='_mgn', how='outer')

Unnamed: 0_level_0,city,state_rev,revenue,branch,state_mgn,manager
branch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,Austin,TX,100.0,Austin,TX,Charles
20,Denver,CO,83.0,Denver,CO,Joel
30,Springfield,MO,4.0,,,
31,,,,Springfield,IL,Sally
47,Mendocino,CA,200.0,Mendocino,CA,Brett


In [37]:
managers.join(revenue, lsuffix='_mgn', rsuffix='_rev', how='left')

Unnamed: 0_level_0,branch,state_mgn,manager,city,state_rev,revenue
branch_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,Austin,TX,Charles,Austin,TX,100.0
20,Denver,CO,Joel,Denver,CO,83.0
47,Mendocino,CA,Brett,Mendocino,CA,200.0
31,Springfield,IL,Sally,,,


In [38]:
sales = pd.read_csv('sales.csv', index_col=0)
sales

Unnamed: 0,city,state,units
0,Mendocino,CA,1
1,Denver,CO,4
2,Austin,TX,2
3,Springfield,MO,5
4,Springfield,IL,1


In [39]:
revenue_and_sales = pd.merge(revenue, sales, on=['city','state'], how='right')
revenue_and_sales

Unnamed: 0,city,state,revenue,units
0,Austin,TX,100.0,2
1,Denver,CO,83.0,4
2,Springfield,MO,4.0,5
3,Mendocino,CA,200.0,1
4,Springfield,IL,,1


In [40]:
sales_and_managers = pd.merge(sales, managers, 
                              how='left', 
                              left_on=['city','state'], 
                              right_on=['branch','state'])
sales_and_managers

Unnamed: 0,city,state,units,branch,manager
0,Mendocino,CA,1,Mendocino,Brett
1,Denver,CO,4,Denver,Joel
2,Austin,TX,2,Austin,Charles
3,Springfield,MO,5,,
4,Springfield,IL,1,Springfield,Sally


In [41]:
merge_default = pd.merge(sales_and_managers, revenue_and_sales)
merge_default

Unnamed: 0,city,state,units,branch,manager,revenue
0,Mendocino,CA,1,Mendocino,Brett,200.0
1,Denver,CO,4,Denver,Joel,83.0
2,Austin,TX,2,Austin,Charles,100.0
3,Springfield,MO,5,,,4.0
4,Springfield,IL,1,Springfield,Sally,


In [42]:
merge_outer = pd.merge(sales_and_managers, revenue_and_sales, how='outer')
merge_outer

Unnamed: 0,city,state,units,branch,manager,revenue
0,Mendocino,CA,1,Mendocino,Brett,200.0
1,Denver,CO,4,Denver,Joel,83.0
2,Austin,TX,2,Austin,Charles,100.0
3,Springfield,MO,5,,,4.0
4,Springfield,IL,1,Springfield,Sally,


In [43]:
merge_outer_on = pd.merge(sales_and_managers, revenue_and_sales, 
                          how='outer', 
                          on=['city','state'])
merge_outer_on

Unnamed: 0,city,state,units_x,branch,manager,revenue,units_y
0,Mendocino,CA,1,Mendocino,Brett,200.0,1
1,Denver,CO,4,Denver,Joel,83.0,4
2,Austin,TX,2,Austin,Charles,100.0,2
3,Springfield,MO,5,,,4.0,5
4,Springfield,IL,1,Springfield,Sally,,1


### Ordered merges

In [44]:
software = pd.read_csv('Sales/feb-sales-Software.csv', parse_dates=['Date']).sort_values('Date')
hardware = pd.read_csv('Sales/feb-sales-Hardware.csv', parse_dates=['Date']).sort_values('Date')

In [45]:
print(software)

                 Date          Company   Product  Units
2 2015-02-02 08:33:01            Hooli  Software      3
1 2015-02-03 14:14:18          Initech  Software     13
7 2015-02-04 15:36:29        Streeplex  Software     13
3 2015-02-05 01:53:06  Acme Coporation  Software     19
5 2015-02-09 13:09:55        Mediacore  Software      7
4 2015-02-11 20:03:08          Initech  Software      7
6 2015-02-11 22:50:44            Hooli  Software      4
0 2015-02-16 12:09:19            Hooli  Software     10
8 2015-02-21 05:01:26        Mediacore  Software      3


In [46]:
print(hardware)

                 Date          Company   Product  Units
3 2015-02-02 20:54:49        Mediacore  Hardware      9
0 2015-02-04 21:52:45  Acme Coporation  Hardware     14
1 2015-02-07 22:58:10  Acme Coporation  Hardware      1
2 2015-02-19 10:59:33        Mediacore  Hardware     16
4 2015-02-21 20:41:47            Hooli  Hardware      3


In [47]:
pd.merge(hardware,software)

Unnamed: 0,Date,Company,Product,Units


In [48]:
pd.merge(hardware, software, how='outer')

Unnamed: 0,Date,Company,Product,Units
0,2015-02-02 20:54:49,Mediacore,Hardware,9
1,2015-02-04 21:52:45,Acme Coporation,Hardware,14
2,2015-02-07 22:58:10,Acme Coporation,Hardware,1
3,2015-02-19 10:59:33,Mediacore,Hardware,16
4,2015-02-21 20:41:47,Hooli,Hardware,3
5,2015-02-02 08:33:01,Hooli,Software,3
6,2015-02-03 14:14:18,Initech,Software,13
7,2015-02-04 15:36:29,Streeplex,Software,13
8,2015-02-05 01:53:06,Acme Coporation,Software,19
9,2015-02-09 13:09:55,Mediacore,Software,7


In [49]:
pd.merge(hardware, software, how='outer').sort_values('Date')

Unnamed: 0,Date,Company,Product,Units
5,2015-02-02 08:33:01,Hooli,Software,3
0,2015-02-02 20:54:49,Mediacore,Hardware,9
6,2015-02-03 14:14:18,Initech,Software,13
7,2015-02-04 15:36:29,Streeplex,Software,13
1,2015-02-04 21:52:45,Acme Coporation,Hardware,14
8,2015-02-05 01:53:06,Acme Coporation,Software,19
2,2015-02-07 22:58:10,Acme Coporation,Hardware,1
9,2015-02-09 13:09:55,Mediacore,Software,7
10,2015-02-11 20:03:08,Initech,Software,7
11,2015-02-11 22:50:44,Hooli,Software,4


In [50]:
pd.merge_ordered(hardware,software)

Unnamed: 0,Date,Company,Product,Units
0,2015-02-02 08:33:01,Hooli,Software,3
1,2015-02-02 20:54:49,Mediacore,Hardware,9
2,2015-02-03 14:14:18,Initech,Software,13
3,2015-02-04 15:36:29,Streeplex,Software,13
4,2015-02-04 21:52:45,Acme Coporation,Hardware,14
5,2015-02-05 01:53:06,Acme Coporation,Software,19
6,2015-02-07 22:58:10,Acme Coporation,Hardware,1
7,2015-02-09 13:09:55,Mediacore,Software,7
8,2015-02-11 20:03:08,Initech,Software,7
9,2015-02-11 22:50:44,Hooli,Software,4


In [51]:
pd.merge_ordered(hardware, software, on=['Date','Company'],
                suffixes=['_hardware','_software'])

Unnamed: 0,Date,Company,Product_hardware,Units_hardware,Product_software,Units_software
0,2015-02-02 08:33:01,Hooli,,,Software,3.0
1,2015-02-02 20:54:49,Mediacore,Hardware,9.0,,
2,2015-02-03 14:14:18,Initech,,,Software,13.0
3,2015-02-04 15:36:29,Streeplex,,,Software,13.0
4,2015-02-04 21:52:45,Acme Coporation,Hardware,14.0,,
5,2015-02-05 01:53:06,Acme Coporation,,,Software,19.0
6,2015-02-07 22:58:10,Acme Coporation,Hardware,1.0,,
7,2015-02-09 13:09:55,Mediacore,,,Software,7.0
8,2015-02-11 20:03:08,Initech,,,Software,7.0
9,2015-02-11 22:50:44,Hooli,,,Software,4.0


In [52]:
austin = pd.read_csv('austin.csv', index_col=0, parse_dates=['date'])
houston = pd.read_csv('houston.csv', index_col=0, parse_dates=['date'])
print(austin)

        date ratings
0 2016-01-01  Cloudy
1 2016-02-08  Cloudy
2 2016-01-17   Sunny


In [53]:
tx_weather = pd.merge_ordered(austin, houston)
print(tx_weather)

        date ratings
0 2016-01-01  Cloudy
1 2016-01-04   Rainy
2 2016-01-17   Sunny
3 2016-02-08  Cloudy
4 2016-03-01   Sunny


In [54]:
tx_weather_suff = pd.merge_ordered(austin, houston, 
                                   on='date',
                                   suffixes=['_aus','_hus'])
print(tx_weather_suff)

        date ratings_aus ratings_hus
0 2016-01-01      Cloudy      Cloudy
1 2016-01-04         NaN       Rainy
2 2016-01-17       Sunny         NaN
3 2016-02-08      Cloudy         NaN
4 2016-03-01         NaN       Sunny


In [55]:
tx_weather_ffill = pd.merge_ordered(austin, houston, 
                                   on='date',
                                   suffixes=['_aus','_hus'],
                                   fill_method='ffill')
print(tx_weather_ffill)

        date ratings_aus ratings_hus
0 2016-01-01      Cloudy      Cloudy
1 2016-01-04      Cloudy       Rainy
2 2016-01-17       Sunny       Rainy
3 2016-02-08      Cloudy       Rainy
4 2016-03-01      Cloudy       Sunny


In [60]:
auto = pd.read_csv('automobiles.csv', parse_dates=['yr'])
auto.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,1970-01-01,US,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,1970-01-01,US,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,1970-01-01,US,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,1970-01-01,US,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,1970-01-01,US,ford torino


In [61]:
oil = pd.read_csv('oil_price.csv', parse_dates=['Date'])
oil.head()

Unnamed: 0,Date,Price
0,1970-01-01,3.35
1,1970-02-01,3.35
2,1970-03-01,3.35
3,1970-04-01,3.35
4,1970-05-01,3.35


In [63]:
merged = pd.merge_asof(auto, oil, left_on='yr', right_on='Date')
merged.tail()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name,Date,Price
387,27.0,4,140.0,86,2790,15.6,1982-01-01,US,ford mustang gl,1982-01-01,33.85
388,44.0,4,97.0,52,2130,24.6,1982-01-01,Europe,vw pickup,1982-01-01,33.85
389,32.0,4,135.0,84,2295,11.6,1982-01-01,US,dodge rampage,1982-01-01,33.85
390,28.0,4,120.0,79,2625,18.6,1982-01-01,US,ford ranger,1982-01-01,33.85
391,31.0,4,119.0,82,2720,19.4,1982-01-01,US,chevy s-10,1982-01-01,33.85


In [66]:
yearly = merged.resample('A', on='Date')[['mpg','Price']].mean()
yearly

Unnamed: 0_level_0,mpg,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1970-12-31,17.689655,3.35
1971-12-31,21.111111,3.56
1972-12-31,18.714286,3.56
1973-12-31,17.1,3.56
1974-12-31,22.769231,10.11
1975-12-31,20.266667,11.16
1976-12-31,21.573529,11.16
1977-12-31,23.375,13.9
1978-12-31,24.061111,14.85
1979-12-31,25.093103,14.85


In [67]:
yearly.corr()

Unnamed: 0,mpg,Price
mpg,1.0,0.948677
Price,0.948677,1.0
