# Введение в комбинирование Data with Pandas' `merge`, `join`, and `concat`

Эта записная книжка содержит примеры кода для использования с одноименной статьей, а также краткие комментарии к каждому из примеров.

In [2]:
import pandas as pd

pd.set_option("display.max_columns", 50)

## Importing Data

Здесь вы импортируете наборы данных климатических норм температуры и осадков в DataFrames. Вызов `.head()` в DataFrame даст вам предварительный просмотр 5 строк ваших данных, а атрибут `shape` даст вам размеры данных в форме `(rows, columns)`. Это отличные проверки работоспособности, которые нужно выполнить, прежде чем делать слишком много с данными.

In [3]:
climate_temp = pd.read_csv("climate_temp.csv")
climate_temp.head()

Unnamed: 0,STATION,STATION_NAME,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15


In [4]:
climate_temp.shape

(127020, 21)

In [5]:
climate_precip = pd.read_csv("climate_precip.csv")
climate_precip.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.02,0.0,0.02,0.0,98,43,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.04,0.0,0.04,0.0,99,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.05,0.0,0.05,0.0,100,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.07,0.0,0.07,0.0,101,45,12,3,0,0,0,0,0,0,0,0,0
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.09,0.0,0.09,0.0,102,45,12,3,0,0,0,0,0,0,0,0,0


In [6]:
climate_precip.shape

(151110, 29)

## merge()

В этом разделе вы узнаете о функциональности merge() в Pandas.

### Inner Join

Здесь вы будете использовать простой вызов `merge()` для выполнения внутреннего соединения и узнаете, как это может привести к меньшему, более целенаправленному набору данных. Сначала вы создадите новый объект DataFrame, содержащий данные об осадках с одной станции.

In [7]:
precip_one_station = climate_precip[climate_precip["STATION"] == 'GHCND:USC00045721']

In [9]:
precip_one_station.shape

(365, 29)

In [10]:
precip_one_station

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
1460,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999
1461,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999
1462,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999
1463,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999
1464,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101227,0.04,-666,-66.6,0.15,-666,-66.6,0.44,-666,-66.6,1.14,0.3,11.29,1.5,132,76,30,12,4,0,3,3,9,6,0,2,2
1821,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101228,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.19,0.3,11.34,1.5,133,77,30,12,4,0,3,3,9,6,0,2,2
1822,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101229,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.24,0.3,11.39,1.5,133,78,30,11,4,0,3,3,9,6,0,2,2
1823,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101230,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.30,0.3,11.45,1.5,134,79,30,11,4,0,3,3,9,6,0,2,2


In [11]:
precip_one_station = climate_precip.query("STATION == 'GHCND:USC00045721'")
precip_one_station.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
1460,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999
1461,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999
1462,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999
1463,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999
1464,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999


In [12]:
precip_one_station.shape

(365, 29)

In [None]:
#29 21 из 3 общие. 

In [16]:
29+21 - 3

47

In [17]:
climate_temp.shape

(127020, 21)

In [37]:
precip_one_station.iloc[1]['DATE']=20100103

In [39]:
precip_one_station

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
1460,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999
1461,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999
1462,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999
1463,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999
1464,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101227,0.04,-666,-66.6,0.15,-666,-66.6,0.44,-666,-66.6,1.14,0.3,11.29,1.5,132,76,30,12,4,0,3,3,9,6,0,2,2
1821,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101228,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.19,0.3,11.34,1.5,133,77,30,12,4,0,3,3,9,6,0,2,2
1822,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101229,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.24,0.3,11.39,1.5,133,78,30,11,4,0,3,3,9,6,0,2,2
1823,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101230,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.30,0.3,11.45,1.5,134,79,30,11,4,0,3,3,9,6,0,2,2


In [40]:
inner_merged = pd.merge(precip_one_station, climate_temp)
inner_merged.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,12,14,19
1,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,11,14,19
2,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
3,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
4,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19


Как вы думаете, сколько строк имеет этот объединенный DataFrame?

In [41]:
inner_merged.shape

(365, 47)

In [42]:
inner_merged

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,12,14,19
1,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,11,14,19
2,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
3,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
4,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101227,0.04,-666,-66.6,0.15,-666,-66.6,0.44,-666,-66.6,1.14,0.3,11.29,1.5,132,76,30,12,4,0,3,3,9,6,0,2,2,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
361,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101228,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.19,0.3,11.34,1.5,133,77,30,12,4,0,3,3,9,6,0,2,2,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
362,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101229,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.24,0.3,11.39,1.5,133,78,30,11,4,0,3,3,9,6,0,2,2,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
363,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101230,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.30,0.3,11.45,1.5,134,79,30,11,4,0,3,3,9,6,0,2,2,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20


Вы получаете 365 строк, потому что любые несоответствующие строки отбрасываются во внутреннем соединении, которое является методом слияния по умолчанию для вызова `merge()`, а `precip_one_station` имеет только 365 строк.

Что, если вы хотите объединить оба полных набора данных, но указать, какие столбцы объединять? В этом случае вы будете использовать параметр on:

In [44]:
climate_temp.columns

Index(['STATION', 'STATION_NAME', 'ELEVATION', 'LATITUDE', 'LONGITUDE', 'DATE',
       'DLY-CLDD-BASE45', 'DLY-CLDD-BASE50', 'DLY-CLDD-BASE55',
       'DLY-CLDD-BASE57', 'DLY-CLDD-BASE60', 'DLY-CLDD-NORMAL',
       'DLY-CLDD-BASE70', 'DLY-CLDD-BASE72', 'DLY-HTDD-BASE40',
       'DLY-HTDD-BASE45', 'DLY-HTDD-BASE50', 'DLY-HTDD-BASE55',
       'DLY-HTDD-BASE57', 'DLY-HTDD-BASE60', 'DLY-HTDD-NORMAL'],
      dtype='object')

In [45]:
for i in climate_temp.columns:
 if i in climate_precip.columns:
    print(i)

STATION
STATION_NAME
DATE


In [46]:
print(climate_temp.shape)
print(climate_precip.shape)

(127020, 21)
(151110, 29)


In [55]:
inner_merged_total = pd.merge(
    climate_temp, climate_precip, on=["STATION", "DATE"]
)
inner_merged_total.head(20)

Unnamed: 0,STATION,STATION_NAME_x,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION_NAME_y,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.02,0.0,0.02,0.0,98,43,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.04,0.0,0.04,0.0,99,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.05,0.0,0.05,0.0,100,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.07,0.0,0.07,0.0,101,45,12,3,0,0,0,0,0,0,0,0,0
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.09,0.0,0.09,0.0,102,45,12,3,0,0,0,0,0,0,0,0,0
5,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100106,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.11,0.0,0.11,0.0,103,46,12,2,0,0,0,0,0,0,0,0,0
6,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100107,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.13,0.0,0.13,0.0,104,46,12,2,0,0,0,0,0,0,0,0,0
7,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100108,6,3,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,9,14,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.15,0.0,0.15,0.0,104,47,12,2,0,0,0,0,0,0,0,0,0
8,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100109,6,3,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,9,14,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.17,0.0,0.17,0.0,104,47,12,2,0,0,0,0,0,0,0,0,0
9,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100110,6,3,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,9,14,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.19,0.0,0.19,0.0,104,47,11,2,0,0,0,0,0,0,0,0,0


In [56]:
inner_merged_total.shape

(123005, 48)

Вы можете указать один _ключевой столбец_ со строкой или несколько ключевых столбцов со списком, как в приведенном выше примере. В результате получается DataFrame с 123005 строками и 48 столбцами.

Почему 48 столбцов вместо 47? Поскольку вы указали столбцы ключей для объединения, Pandas не пытается объединить все объединяемые столбцы. Это может привести к «дублированию» имен столбцов, которые могут иметь или не иметь разные значения. «Дублировать» заключено в кавычки, потому что столбцы на самом деле будут иметь новые имена, по умолчанию к ним добавляются `_x` и `_y`. Вы также можете использовать параметр `suffixes`, чтобы управлять тем, что добавляется к именам столбцов.

### Outer Join
При внешнем объединении вы также сохраните строки, в которых нет совпадений. В этом примере вы будете использовать меньший фрейм данных об осадках «precip_one_station» с полным фреймом данных «climate_temp» и соединить столбцы «STATION» и «DATE» в качестве ключевых столбцов. Потратьте секунду и подумайте, сколько строк вы ожидаете от нового DataFrame.

In [57]:
outer_merged = pd.merge(
    precip_one_station, climate_temp, how="outer", on=["STATION", "DATE"]
)
outer_merged.head()

Unnamed: 0,STATION,STATION_NAME_x,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,STATION_NAME_y,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666.0,-66.6,0.16,-666.0,-66.6,0.44,-666.0,-66.6,0.04,0.0,0.04,0.0,137.0,81.0,31.0,11.0,4.0,0.0,3.0,3.0,9.0,6.0,0.0,-9999.0,-9999.0,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,12,14,19
1,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666.0,-66.6,0.16,-666.0,-66.6,0.44,-666.0,-66.6,0.08,0.0,0.08,0.0,138.0,83.0,31.0,11.0,4.0,0.0,3.0,3.0,10.0,6.0,0.0,-9999.0,-9999.0,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,11,14,19
2,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666.0,-66.6,0.16,-666.0,-66.6,0.45,-666.0,-66.6,0.12,0.0,0.12,0.0,139.0,84.0,31.0,11.0,4.0,0.0,3.0,3.0,10.0,6.0,0.0,-9999.0,-9999.0,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
3,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666.0,-66.6,0.16,-666.0,-66.6,0.45,-666.0,-66.6,0.16,0.0,0.16,0.0,140.0,85.0,32.0,11.0,4.0,0.0,3.0,2.0,10.0,6.0,0.0,-9999.0,-9999.0,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
4,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666.0,-66.6,0.17,-666.0,-66.6,0.46,-666.0,-66.6,0.21,0.0,0.21,0.0,141.0,86.0,32.0,11.0,4.0,0.0,3.0,2.0,10.0,6.0,0.0,-9999.0,-9999.0,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19


In [58]:
outer_merged.shape

(127020, 48)

In [59]:
outer_merged[1250:1260]

Unnamed: 0,STATION,STATION_NAME_x,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,STATION_NAME_y,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
1250,GHCND:USC00047902,,20100605,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,18,13,8,6,3,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1251,GHCND:USC00047902,,20100606,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,18,13,8,6,3,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1252,GHCND:USC00047902,,20100607,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,18,13,8,6,3,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1253,GHCND:USC00047902,,20100608,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,18,13,8,6,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1254,GHCND:USC00047902,,20100609,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,18,13,8,6,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1255,GHCND:USC00047902,,20100610,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,19,14,9,7,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1256,GHCND:USC00047902,,20100611,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,19,14,9,7,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1257,GHCND:USC00047902,,20100612,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,19,14,9,7,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1258,GHCND:USC00047902,,20100613,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,19,14,9,7,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2
1259,GHCND:USC00047902,,20100614,,,,,,,,,,,,,,,,,,,,,,,,,,,SANTA BARBARA CA US,4.9,34.4167,-119.6844,19,14,9,7,4,1,-7777,-7777,0,0,0,-7777,-7777,-7777,2


Если вы помните, когда вы проверяли атрибут `.shape` в `climate_temp`, вы увидите, что количество строк в `outer_merged` совпадает с этим. При внешнем объединении вы можете ожидать, что у вас будет такое же количество строк, как и у большего DataFrame, поскольку ни одна из них не будет потеряна, как при внутреннем соединении

### Left Join
Также известен как левое внешнее соединение. В этом объединении вы сохраните строки, у которых нет совпадений, только в левом (или первом) кадре данных для объединения.

In [60]:
left_merged = pd.merge(
    climate_temp, precip_one_station, how="left", on=["STATION", "DATE"]
)
left_merged.loc[4567:4578]

Unnamed: 0,STATION,STATION_NAME_x,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION_NAME_y,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
4567,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100707,16,11,7,5,3,1,-7777,-7777,0,-7777,-7777,-7777,1,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4568,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100708,16,11,7,5,3,1,-7777,-7777,0,-7777,-7777,-7777,1,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4569,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100709,17,12,7,5,3,1,-7777,-7777,0,0,-7777,-7777,1,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4570,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100710,17,12,7,5,3,1,-7777,-7777,0,0,-7777,-7777,1,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4571,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100711,17,12,7,6,3,1,-7777,-7777,0,-7777,-7777,-7777,1,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4572,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100712,17,12,7,6,3,1,-7777,-7777,0,-7777,-7777,-7777,1,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4573,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100713,17,12,8,6,3,1,-7777,-7777,0,-7777,-7777,-7777,-7777,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4574,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100714,17,13,8,6,4,1,-7777,-7777,0,-7777,-7777,-7777,-7777,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,
4575,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100715,18,13,8,6,4,1,-7777,-7777,0,-7777,-7777,-7777,-7777,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,
4576,GHCND:USC00042467,DONNER MEMORIAL ST PARK CA US,1809.6,39.3239,-120.2331,20100716,18,13,8,6,4,1,-7777,-7777,0,-7777,-7777,-7777,-7777,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [22]:
left_merged.shape

(127020, 48)

Здесь вы видите, что количество строк в результирующем кадре данных совпадает с количеством строк в кадре данных «climate_temp». Что, если мы поменяем местами два фрейма данных, которые мы объединяем?

In [61]:
left_merged_reversed = pd.merge(
    precip_one_station, climate_temp, how="left", on=["STATION", "DATE"]
)
left_merged_reversed.head()

Unnamed: 0,STATION,STATION_NAME_x,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,STATION_NAME_y,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,12,14,19
1,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,11,14,19
2,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
3,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
4,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19


In [62]:
left_merged_reversed.shape

(365, 48)

### Right Join
This works the same as the left join, however non-matching rows are only retained in the _right_ DataFrame. In the next example, you will recreate the `left_merged` DataFrame but with a right join.

In [40]:
right_merged = pd.merge(
    precip_one_station, climate_temp, how="right", on=["STATION", "DATE"]
)
right_merged.head()

Unnamed: 0,STATION,STATION_NAME_x,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,STATION_NAME_y,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00049099,,20100101,,,,,,,,,,,,,,,,,,,,,,,,,,,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15
1,GHCND:USC00049099,,20100102,,,,,,,,,,,,,,,,,,,,,,,,,,,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15
2,GHCND:USC00049099,,20100103,,,,,,,,,,,,,,,,,,,,,,,,,,,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15
3,GHCND:USC00049099,,20100104,,,,,,,,,,,,,,,,,,,,,,,,,,,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15
4,GHCND:USC00049099,,20100105,,,,,,,,,,,,,,,,,,,,,,,,,,,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15


In [41]:
right_merged.shape

(127020, 48)

Здесь вы просто перевернули позиции входных фреймов данных и указали правильное соединение. Когда вы исследуете `right_merged`, вы можете заметить, что это не совсем то же самое, что `left_merged`. Единственная разница между ними заключается в порядке столбцов: столбцы первого ввода всегда будут первыми во вновь сформированном кадре данных.

## .присоединиться()
`.join()` внутри использует `merge()`, но предоставляет гораздо более упрощенный интерфейс для `merge()` и по умолчанию объединяется с индексами. Вот вводный пример использования параметров lsuffix и rsuffix для обработки перекрывающихся имен столбцов.

In [42]:
precip_one_station.join(climate_temp, lsuffix="_left", rsuffix="_right")

Unnamed: 0,STATION_left,STATION_NAME_left,DATE_left,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,STATION_right,STATION_NAME_right,ELEVATION,LATITUDE,LONGITUDE,DATE_right,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
1460,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20100101,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,12,14,19
1461,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20100102,3,1,-7777,-7777,-7777,0,0,0,1,3,6,10,11,14,19
1462,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20100103,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
1463,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20100104,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
1464,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20100105,3,1,-7777,-7777,-7777,0,0,0,1,2,5,9,11,14,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101227,0.04,-666,-66.6,0.15,-666,-66.6,0.44,-666,-66.6,1.14,0.3,11.29,1.5,132,76,30,12,4,0,3,3,9,6,0,2,2,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20101227,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
1821,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101228,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.19,0.3,11.34,1.5,133,77,30,12,4,0,3,3,9,6,0,2,2,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20101228,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
1822,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101229,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.24,0.3,11.39,1.5,133,78,30,11,4,0,3,3,9,6,0,2,2,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20101229,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20
1823,GHCND:USC00045721,MITCHELL CAVERNS CA US,20101230,0.04,-666,-66.6,0.15,-666,-66.6,0.43,-666,-66.6,1.30,0.3,11.45,1.5,134,79,30,11,4,0,3,3,9,6,0,2,2,GHCND:USC00045721,MITCHELL CAVERNS CA US,1325.9,34.9436,-115.5469,20101230,3,1,-7777,-7777,-7777,-7777,0,0,1,3,6,10,12,15,20


Если вы проверите данные, вы увидите, что перекрывающиеся столбцы сохранены, просто переименованы, чтобы быть уникальными. Если мы перевернем это и вместо этого вызовем `.join()` для большего DataFrame, вы заметите, что DataFrame больше, но данные, которые не существуют в меньшем DataFrame (`precip_one_station`), заполняются ` NaN` (_не число_).

In [43]:
climate_temp.join(precip_one_station, lsuffix="_left", rsuffix="_right")

Unnamed: 0,STATION_left,STATION_NAME_left,ELEVATION,LATITUDE,LONGITUDE,DATE_left,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION_right,STATION_NAME_right,DATE_right,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127015,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101227,4,2,1,-7777,-7777,-7777,0,0,2,4,6,10,12,15,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
127016,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101228,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
127017,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101229,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
127018,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101230,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


If you must use `.join()` and want to merge the columns, you must set them to be indexes first. First take a look at this previously used `merge()` operation:

In [44]:
inner_merged_total = pd.merge(
    climate_temp, climate_precip, on=["STATION", "DATE"]
)
inner_merged_total.head()

Unnamed: 0,STATION,STATION_NAME_x,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION_NAME_y,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.02,0.0,0.02,0.0,98,43,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.04,0.0,0.04,0.0,99,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.05,0.0,0.05,0.0,100,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.07,0.0,0.07,0.0,101,45,12,3,0,0,0,0,0,0,0,0,0
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.09,0.0,0.09,0.0,102,45,12,3,0,0,0,0,0,0,0,0,0


In [45]:
inner_joined_total = climate_temp.join(
    climate_precip.set_index(["STATION", "DATE"]),
    on=["STATION", "DATE"],
    how="inner",
    lsuffix="_x",
    rsuffix="_y",
)
inner_joined_total.head()

Unnamed: 0,STATION,STATION_NAME_x,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION_NAME_y,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.02,0.0,0.02,0.0,98,43,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.04,0.0,0.04,0.0,99,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.05,0.0,0.05,0.0,100,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.07,0.0,0.07,0.0,101,45,12,3,0,0,0,0,0,0,0,0,0
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,TWENTYNINE PALMS CA US,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.09,0.0,0.09,0.0,102,45,12,3,0,0,0,0,0,0,0,0,0


Because `.join()` works on indexes, if we want to recreate `merge()` before, then we must set indexes on the join columns we specify. In this example, you used the `.set_index()` method to set your indexes to the key columns within the join. 

Below you will see an almost-bare `.join()` call. Because there are overlapping columns, you will have to specify a suffix with `lsuffix`, `rsuffix`, or both, but this example will demonstrate the more typical behavior of `.join()`.

In [46]:
climate_temp.join(climate_precip, lsuffix="_left")

Unnamed: 0,STATION_left,STATION_NAME_left,ELEVATION,LATITUDE,LONGITUDE,DATE_left,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6,2,-7777,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.02,0.0,0.02,0.0,98,43,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6,2,1,-7777,-7777,0,0,0,-7777,1,2,6,7,10,15,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.04,0.0,0.04,0.0,99,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103,-6.66,-666,-66.6,-6.66,-666,-66.6,-6.66,-666,-66.6,0.05,0.0,0.05,0.0,100,44,12,3,-9999,0,-9999,-9999,-9999,-9999,0,-9999,-9999
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6,2,1,-7777,-7777,0,0,0,-7777,1,2,5,7,10,15,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.07,0.0,0.07,0.0,101,45,12,3,0,0,0,0,0,0,0,0,0
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6,2,1,-7777,-7777,0,0,0,-7777,-7777,2,5,7,10,15,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,-6.66,-9999,-9999.0,0.09,0.0,0.09,0.0,102,45,12,3,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127015,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101227,4,2,1,-7777,-7777,-7777,0,0,2,4,6,10,12,15,20,GHCND:USC00047874,SAN PASQUAL ANIMAL PARK CA US,20101227,0.06,-9999,-9999.0,0.20,-9999,-9999.0,0.45,-9999,-9999.0,1.78,0.0,13.71,0.0,210,137,45,15,0,0,0,0,0,0,0,0,0
127016,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101228,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,20,GHCND:USC00047874,SAN PASQUAL ANIMAL PARK CA US,20101228,0.06,-9999,-9999.0,0.20,-9999,-9999.0,0.45,-9999,-9999.0,1.85,0.0,13.78,0.0,210,137,46,15,0,0,0,0,0,0,0,0,0
127017,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101229,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,19,GHCND:USC00047874,SAN PASQUAL ANIMAL PARK CA US,20101229,0.06,-9999,-9999.0,0.20,-9999,-9999.0,0.46,-9999,-9999.0,1.93,0.0,13.86,0.0,210,137,47,16,0,0,0,0,0,0,0,0,0
127018,GHCND:USC00046006,MOUNT WILSON CBS CA US,1740.4,34.2308,-118.0711,20101230,4,2,1,-7777,-7777,-7777,0,0,2,3,6,10,12,15,19,GHCND:USC00047874,SAN PASQUAL ANIMAL PARK CA US,20101230,0.06,-9999,-9999.0,0.20,-9999,-9999.0,0.47,-9999,-9999.0,2.01,0.0,13.94,0.0,210,137,48,16,0,0,0,0,0,0,0,0,0


## concat()


Во-первых, вы выполните базовую конкатенацию вдоль оси по умолчанию, используя кадры данных, с которыми вы играли в этом руководстве:

In [47]:
double_precip = pd.concat([precip_one_station, precip_one_station])
double_precip.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
1460,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999
1461,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999
1462,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999
1463,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999
1464,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999


In [48]:
double_precip.shape

(730, 29)

In [50]:
precip_one_station.shape

(365, 29)

Этот очень прост по дизайну. Здесь вы создали DataFrame, который является дубликатом небольшого DataFrame, созданного ранее. Следует заметить, что индексы повторяются. Если вам нужен новый индекс, основанный на 0, вы можете использовать ignore_index параметр:



In [51]:
reindexed = pd.concat(
    [precip_one_station, precip_one_station], ignore_index=True
)
reindexed.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
0,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100101,0.04,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.04,0.0,0.04,0.0,137,81,31,11,4,0,3,3,9,6,0,-9999,-9999
1,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100102,0.05,-666,-66.6,0.16,-666,-66.6,0.44,-666,-66.6,0.08,0.0,0.08,0.0,138,83,31,11,4,0,3,3,10,6,0,-9999,-9999
2,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100103,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.12,0.0,0.12,0.0,139,84,31,11,4,0,3,3,10,6,0,-9999,-9999
3,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100104,0.05,-666,-66.6,0.16,-666,-66.6,0.45,-666,-66.6,0.16,0.0,0.16,0.0,140,85,32,11,4,0,3,2,10,6,0,-9999,-9999
4,GHCND:USC00045721,MITCHELL CAVERNS CA US,20100105,0.05,-666,-66.6,0.17,-666,-66.6,0.46,-666,-66.6,0.21,0.0,0.21,0.0,141,86,32,11,4,0,3,2,10,6,0,-9999,-9999


In [52]:
reindexed.shape

(730, 29)

When axis labels for the axis you are **not** concatenating along don't match (for example, column labels when concatenating along rows), then all columns are preserved and missing data is filled in with `NaN`. 

In [53]:
outer_joined = pd.concat([climate_precip, climate_temp])
outer_joined.head()

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.02,0.0,0.02,0.0,98.0,43.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.04,0.0,0.04,0.0,99.0,44.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.05,0.0,0.05,0.0,100.0,44.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,0.07,0.0,0.07,0.0,101.0,45.0,12.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,0.09,0.0,0.09,0.0,102.0,45.0,12.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,


Как отмечалось ранее, если вы объединяете по оси 0 (строки), но имеете несовпадающие метки на оси 1 (столбцы), то эти столбцы будут добавлены и заполнены NaN значениями. Это приводит к внешнему соединению

In [54]:
outer_joined.shape

(278130, 47)

In [48]:
outer_joined

Unnamed: 0,STATION,STATION_NAME,DATE,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI,ELEVATION,LATITUDE,LONGITUDE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.02,0.0,0.02,0.0,98.0,43.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.04,0.0,0.04,0.0,99.0,44.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,-6.66,-666.0,-66.6,0.05,0.0,0.05,0.0,100.0,44.0,12.0,3.0,-9999.0,0.0,-9999.0,-9999.0,-9999.0,-9999.0,0.0,-9999.0,-9999.0,,,,,,,,,,,,,,,,,,
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,0.07,0.0,0.07,0.0,101.0,45.0,12.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,-6.66,-9999.0,-9999.0,0.09,0.0,0.09,0.0,102.0,45.0,12.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127015,GHCND:USC00046006,MOUNT WILSON CBS CA US,20101227,,,,,,,,,,,,,,,,,,,,,,,,,,,1740.4,34.2308,-118.0711,4.0,2.0,1.0,-7777.0,-7777.0,-7777.0,0.0,0.0,2.0,4.0,6.0,10.0,12.0,15.0,20.0
127016,GHCND:USC00046006,MOUNT WILSON CBS CA US,20101228,,,,,,,,,,,,,,,,,,,,,,,,,,,1740.4,34.2308,-118.0711,4.0,2.0,1.0,-7777.0,-7777.0,-7777.0,0.0,0.0,2.0,3.0,6.0,10.0,12.0,15.0,20.0
127017,GHCND:USC00046006,MOUNT WILSON CBS CA US,20101229,,,,,,,,,,,,,,,,,,,,,,,,,,,1740.4,34.2308,-118.0711,4.0,2.0,1.0,-7777.0,-7777.0,-7777.0,0.0,0.0,2.0,3.0,6.0,10.0,12.0,15.0,19.0
127018,GHCND:USC00046006,MOUNT WILSON CBS CA US,20101230,,,,,,,,,,,,,,,,,,,,,,,,,,,1740.4,34.2308,-118.0711,4.0,2.0,1.0,-7777.0,-7777.0,-7777.0,0.0,0.0,2.0,3.0,6.0,10.0,12.0,15.0,19.0


С этими двумя кадрами данных, поскольку вы просто объединяете строки, очень немногие столбцы имеют одинаковое имя. Это означает, что вы увидите множество столбцов со NaNзначениями.

Чтобы вместо этого удалить столбцы с отсутствующими данными, используйте joinпараметр со значением "inner"для выполнения внутреннего соединения:

In [55]:
inner_joined = pd.concat([climate_temp, climate_precip], join="inner")
inner_joined.head()

Unnamed: 0,STATION,STATION_NAME,DATE
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105


In [56]:
inner_joined.shape

(278130, 3)

Используя внутреннее соединение, у вас останутся только те столбцы, которые являются общими для исходных фреймов данных: STATION, STATION_NAME, и DATE.

Вы также можете перевернуть это, установив axis параметр:

In [57]:
inner_joined_cols = pd.concat(
    [climate_temp, climate_precip], axis="columns", join="inner"
)
inner_joined.head()

Unnamed: 0,STATION,STATION_NAME,DATE
0,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100101
1,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100102
2,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100103
3,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100104
4,GHCND:USC00049099,TWENTYNINE PALMS CA US,20100105


In [58]:
inner_joined_cols.shape

(127020, 50)

Теперь у вас есть только строки, содержащие данные для всех столбцов в обоих кадрах данных. Не случайно количество строк соответствует количеству меньшего DataFrame.

Еще один полезный прием для конкатенации — использование keys параметра для создания меток иерархической оси. Это полезно, если вы хотите сохранить индексы или имена столбцов исходных наборов данных, но также хотите добавить новые:

In [59]:
hierarchical_keys = pd.concat(
    [climate_temp, climate_precip], keys=["temp", "precip"]
)
hierarchical_keys.head()

Unnamed: 0,Unnamed: 1,STATION,STATION_NAME,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
temp,0,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100101,6.0,2.0,-7777.0,-7777.0,-7777.0,0.0,0.0,0.0,-7777.0,1.0,2.0,6.0,7.0,10.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,
temp,1,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100102,6.0,2.0,1.0,-7777.0,-7777.0,0.0,0.0,0.0,-7777.0,1.0,2.0,6.0,7.0,10.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,
temp,2,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100103,6.0,2.0,1.0,-7777.0,-7777.0,0.0,0.0,0.0,-7777.0,1.0,2.0,5.0,7.0,10.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,
temp,3,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100104,6.0,2.0,1.0,-7777.0,-7777.0,0.0,0.0,0.0,-7777.0,1.0,2.0,5.0,7.0,10.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,
temp,4,GHCND:USC00049099,TWENTYNINE PALMS CA US,602,34.12806,-116.03694,20100105,6.0,2.0,1.0,-7777.0,-7777.0,0.0,0.0,0.0,-7777.0,-7777.0,2.0,5.0,7.0,10.0,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,


In [62]:
hierarchical_keys.tail()

Unnamed: 0,Unnamed: 1,STATION,STATION_NAME,ELEVATION,LATITUDE,LONGITUDE,DATE,DLY-CLDD-BASE45,DLY-CLDD-BASE50,DLY-CLDD-BASE55,DLY-CLDD-BASE57,DLY-CLDD-BASE60,DLY-CLDD-NORMAL,DLY-CLDD-BASE70,DLY-CLDD-BASE72,DLY-HTDD-BASE40,DLY-HTDD-BASE45,DLY-HTDD-BASE50,DLY-HTDD-BASE55,DLY-HTDD-BASE57,DLY-HTDD-BASE60,DLY-HTDD-NORMAL,DLY-PRCP-25PCTL,DLY-SNWD-25PCTL,DLY-SNOW-25PCTL,DLY-PRCP-50PCTL,DLY-SNWD-50PCTL,DLY-SNOW-50PCTL,DLY-PRCP-75PCTL,DLY-SNWD-75PCTL,DLY-SNOW-75PCTL,MTD-PRCP-NORMAL,MTD-SNOW-NORMAL,YTD-PRCP-NORMAL,YTD-SNOW-NORMAL,DLY-PRCP-PCTALL-GE001HI,DLY-PRCP-PCTALL-GE010HI,DLY-PRCP-PCTALL-GE050HI,DLY-PRCP-PCTALL-GE100HI,DLY-SNWD-PCTALL-GE001WI,DLY-SNWD-PCTALL-GE010WI,DLY-SNWD-PCTALL-GE003WI,DLY-SNWD-PCTALL-GE005WI,DLY-SNOW-PCTALL-GE001TI,DLY-SNOW-PCTALL-GE010TI,DLY-SNOW-PCTALL-GE100TI,DLY-SNOW-PCTALL-GE030TI,DLY-SNOW-PCTALL-GE050TI
precip,151105,GHCND:USC00046006,MOUNT WILSON CBS CA US,,,,20101227,,,,,,,,,,,,,,,,0.12,-9999.0,-9999.0,0.41,-9999.0,-9999.0,1.13,-9999.0,-9999.0,3.91,-9999.0,35.2,-9999.0,218.0,169.0,100.0,62.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
precip,151106,GHCND:USC00046006,MOUNT WILSON CBS CA US,,,,20101228,,,,,,,,,,,,,,,,0.12,-9999.0,-9999.0,0.42,-9999.0,-9999.0,1.14,-9999.0,-9999.0,4.1,-9999.0,35.39,-9999.0,219.0,170.0,101.0,62.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
precip,151107,GHCND:USC00046006,MOUNT WILSON CBS CA US,,,,20101229,,,,,,,,,,,,,,,,0.12,-9999.0,-9999.0,0.43,-9999.0,-9999.0,1.15,-9999.0,-9999.0,4.3,-9999.0,35.59,-9999.0,220.0,171.0,101.0,63.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
precip,151108,GHCND:USC00046006,MOUNT WILSON CBS CA US,,,,20101230,,,,,,,,,,,,,,,,0.12,-9999.0,-9999.0,0.43,-9999.0,-9999.0,1.15,-9999.0,-9999.0,4.5,-9999.0,35.79,-9999.0,220.0,171.0,102.0,64.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
precip,151109,GHCND:USC00046006,MOUNT WILSON CBS CA US,,,,20101231,,,,,,,,,,,,,,,,0.12,-9999.0,-9999.0,0.44,-9999.0,-9999.0,1.16,-9999.0,-9999.0,4.69,-9999.0,35.98,-9999.0,220.0,171.0,103.0,64.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0


In [61]:
hierarchical_keys.index

MultiIndex([(  'temp',      0),
            (  'temp',      1),
            (  'temp',      2),
            (  'temp',      3),
            (  'temp',      4),
            (  'temp',      5),
            (  'temp',      6),
            (  'temp',      7),
            (  'temp',      8),
            (  'temp',      9),
            ...
            ('precip', 151100),
            ('precip', 151101),
            ('precip', 151102),
            ('precip', 151103),
            ('precip', 151104),
            ('precip', 151105),
            ('precip', 151106),
            ('precip', 151107),
            ('precip', 151108),
            ('precip', 151109)],
           length=278130)