# 合并pandas对象

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## dataframe 添加新的行

In [None]:
names = pd.read_csv('data/names.csv')
names

In [None]:
new_data_list = ['Aria', 1]
names.loc[4] = new_data_list
names

In [None]:
names.loc['five'] = ['Zach', 3]
names

In [None]:
names.loc[len(names)] = {'Name':'Zayd', 'Age':2}
names

In [None]:
names.loc[len(names)] = pd.Series({'Age':32, 'Name':'Dean'})
names

In [None]:
names = pd.read_csv('names.csv')
names.append({'Name':'Aria', 'Age':1})

In [None]:
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

In [None]:
names.index = ['Canada', 'Canada', 'USA', 'USA']
names

In [None]:
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

In [None]:
s = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))

In [None]:
names.append(s)

In [None]:
s1 = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))
s2 = pd.Series({'Name': 'Zayd', 'Age': 2}, name='USA')
names.append([s1, s2])

In [None]:
bball_16 = pd.read_csv('baseball16.csv')
bball_16.head()

In [None]:
data_dict = bball_16.iloc[0].to_dict()
print(data_dict)

In [None]:
new_data_dict = {k: '' if isinstance(v, str) else np.nan for k, v in data_dict.items()}
print(new_data_dict)

In [None]:
random_data = []
for i in range(1000):
    d = dict()
    for k, v in data_dict.items():
        if isinstance(v, str):
            d[k] = np.random.choice(list('abcde'))
        else:
            d[k] = np.random.randint(10)
    random_data.append(pd.Series(d, name=i + len(bball_16)))

random_data[0].head()

In [None]:
%%timeit
bball_16_copy = bball_16.copy()
for row in random_data:
    bball_16_copy = bball_16_copy.append(row)

In [None]:
%%timeit
bball_16_copy = bball_16.copy()
bball_16_copy = bball_16_copy.append(random_data)

## 连接多个dataframe

In [None]:
stocks_2016 = pd.read_csv('data/stocks_2016.csv', index_col='Symbol')
stocks_2017 = pd.read_csv('data/stocks_2017.csv', index_col='Symbol')
stocks_2016

In [None]:
stocks_2017

In [None]:
s_list = [stocks_2016, stocks_2017]
pd.concat(s_list)

In [None]:
pd.concat(s_list, keys=['2016', '2017'], names=['Year', 'Symbol'])

In [None]:
pd.concat(s_list, keys=['2016', '2017'], axis='columns', names=['Year', None])

In [None]:
pd.concat(s_list, join='inner', keys=['2016', '2017'], axis='columns', names=['Year', None])

In [None]:
stocks_2016.append(stocks_2017)

In [None]:
stocks_2015 = stocks_2016.copy()
stocks_2017

## 

## concat,join和merge的区别

concat：
- Pandas函数
- 可以垂直和水平地连接两个或多个pandas对象
- 只用索引对齐
- 索引出现重复值时会报错
- 默认是外连接（也可以设为内连接）

join：
- DataFrame方法
- 只能水平连接两个或多个pandas对象
- 对齐是靠被调用的DataFrame的列索引或行索引和另一个
- 对象的行索引（不能是列索引
- 通过笛卡尔积处理重复的索引值
- 默认是左连接（也可以设为内连接、外连接和右连接）

merge：
- DataFrame方法
- 只能水平连接两个DataFrame对象
- 对齐是靠被调用的DataFrame的列或行索引和另一个
- DataFrame的列或行索引
- 通过笛卡尔积处理重复的索引值
- 默认是内连接（也可以设为左连接、外连接、右连接）

In [None]:
from IPython.display import display_html
years = 2016, 2017, 2018
stock_tables = [pd.read_csv('data/stocks_{}.csv'.format(year), index_col='Symbol') for year in years]

def display_frames(frames, num_spaces=0):
    t_style = '<table style="display:inline;"'
    tables_html = [df.to_html().replace('<table', t_style) for df in frames]
    space = '&nbsp;' * num_spaces
    display_html(space.join(tables_html), raw=True)

display_frames(stock_tables, 30)
stocks_2016, stocks_2017, stocks_2018 = stock_tables

In [None]:
pd.concat(stock_tables, keys=[2016, 2017, 2018])

In [None]:
pd.concat(dict(zip(years,stock_tables)), axis='columns')

In [None]:
stocks_2016.join(stocks_2017, lsuffix='_2016', rsuffix='_2017', how='outer')

In [None]:
stocks_2016

In [None]:
other = [stocks_2017.add_suffix('_2017'), stocks_2018.add_suffix('_2018')]
 
stocks_2016.add_suffix('_2016').join(other, how='outer')

In [None]:
stock_join = stocks_2016.add_suffix('_2016').join(other, how='outer')
stock_concat = pd.concat(dict(zip(years,stock_tables)), axis='columns')

stock_concat.columns = stock_concat.columns.get_level_values(1) + '_' + \
                       stock_concat.columns.get_level_values(0).astype(str)

stock_concat

In [None]:
step1 = stocks_2016.merge(stocks_2017, left_index=True, right_index=True, how='outer', suffixes=('_2016', '_2017'))
stock_merge = step1.merge(stocks_2018.add_suffix('_2018'), left_index=True, right_index=True, how='outer')
stock_concat.equals(stock_merge)

In [None]:
names = ['prices', 'transactions']
food_tables = [pd.read_csv('data/food_{}.csv'.format(name)) for name in names]
food_prices, food_transactions = food_tables
display_frames(food_tables, 30)

In [None]:
food_transactions.merge(food_prices, on=['item', 'store'])

In [None]:
food_transactions.merge(food_prices.query('Date== 2017'), how='left')

In [None]:
food_prices_join = food_prices.query('Date ==2017').set_index(['item', 'store'])
food_prices_join

In [None]:
food_transactions.join(food_prices_join, on=['item', 'store'])

In [None]:
pd.concat([food_transactions.set_index(['item', 'store']), food_prices.set_index(['item', 'store'])], axis='columns')

In [None]:
import glob
df_list = []
for filename in glob.glob('data/gasprices/*.csv'):
    df_list.append(pd.read_csv(filename, index_col='Week', parse_dates=['Week']))

gas = pd.concat(df_list, axis='columns')
gas.head()

## 连接 sql 数据库