### Appending and concatenating Series

#### append()
- .append(): Series & DataFrame *method*
- Invocation:
 - s1.append(s2) <br>
- Stacks rows of s2 below s1
- Method for Series & DataFrames

#### concat()
- concat(): pandas module *function*
- Invocation;
 - pd.concat([s1, s2, s3])
- Can stack row-wise or column-wise

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
bronze = pd.read_csv('summer_olympic_medals/bronze_top5.csv', index_col=0)
silver = pd.read_csv('summer_olympic_medals/silver_top5.csv', index_col=0)
gold = pd.read_csv('summer_olympic_medals/gold_top5.csv', index_col=0)

In [3]:
jan = pd.read_csv('Sales/sales-jan-2015.csv', parse_dates=True, index_col='Date')
feb = pd.read_csv('Sales/sales-feb-2015.csv', parse_dates=True, index_col='Date')
mar = pd.read_csv('Sales/sales-mar-2015.csv', parse_dates=True, index_col='Date')

In [4]:
jan_units = jan['Units']
feb_units = feb['Units']
mar_units = mar['Units']

In [5]:
quarter1 = jan_units.append(feb_units).append(mar_units)

In [6]:
quarter1.loc['jan 27, 2015':'feb 2, 2015']

Date
2015-01-27 07:11:55    18
2015-02-02 08:33:01     3
2015-02-02 20:54:49     9
Name: Units, dtype: int64

In [7]:
quarter1.loc['feb 26, 2015':'mar 7, 2015']

Date
2015-02-26 08:57:45     4
2015-02-26 08:58:51     1
2015-03-06 10:11:45    17
2015-03-06 02:03:56    17
Name: Units, dtype: int64

In [8]:
quarter1.sum()

642

In [9]:
units = []

In [10]:
for month in [jan, feb, mar]:
    units.append(month['Units'])

In [11]:
quarter1 = pd.concat(units, axis='rows')

In [12]:
quarter1.head()

Date
2015-01-21 19:13:21    11
2015-01-09 05:23:51     8
2015-01-06 17:19:34    17
2015-01-02 09:51:06    16
2015-01-11 14:51:02    11
Name: Units, dtype: int64

### Appending & concatenating DataFrames

In [13]:
pop1 = pd.read_csv('population_01.csv', index_col=0)
pop2 = pd.read_csv('population_02.csv', index_col=0)
print(type(pop1), pop1.shape)
print(type(pop2), pop2.shape)

<class 'pandas.core.frame.DataFrame'> (4, 1)
<class 'pandas.core.frame.DataFrame'> (4, 1)


In [14]:
pop1

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
66407,479
72732,4716
50579,2405
46241,30670


In [15]:
pop2

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
12776,2180
76092,26669
98360,12221
49464,27481


In [16]:
pop1.append(pop2)

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
66407,479
72732,4716
50579,2405
46241,30670
12776,2180
76092,26669
98360,12221
49464,27481


In [17]:
print(pop1.index.name)
print(pop1.columns)
print(pop2.index.name)
print(pop2.columns)

Zip Code ZCTA
Index(['2010 Census Population'], dtype='object')
Zip Code ZCTA
Index(['2010 Census Population'], dtype='object')


In [18]:
population = pd.read_csv('population_00.csv', index_col=0)
unemployment = pd.read_csv('unemployment_00.csv', index_col=0)
print(population)
print(unemployment)

               2010 Census Population
Zip Code ZCTA                        
57538                             322
59916                             130
37660                           40038
2860                            45199
       unemployment  participants
Zip                              
2860           0.11         34447
46167          0.02          4800
1097           0.33            42
80808          0.07          4310


In [19]:
population.append(unemployment)

Unnamed: 0,2010 Census Population,participants,unemployment
57538,322.0,,
59916,130.0,,
37660,40038.0,,
2860,45199.0,,
2860,,34447.0,0.11
46167,,4800.0,0.02
1097,,42.0,0.33
80808,,4310.0,0.07


In [20]:
pd.concat([population, unemployment], axis=0)

Unnamed: 0,2010 Census Population,participants,unemployment
57538,322.0,,
59916,130.0,,
37660,40038.0,,
2860,45199.0,,
2860,,34447.0,0.11
46167,,4800.0,0.02
1097,,42.0,0.33
80808,,4310.0,0.07


In [21]:
pd.concat([population, unemployment], axis=1)

Unnamed: 0,2010 Census Population,unemployment,participants
1097,,0.33,42.0
2860,45199.0,0.11,34447.0
37660,40038.0,,
46167,,0.02,4800.0
57538,322.0,,
59916,130.0,,
80808,,0.07,4310.0


#### Appending DataFrames with ignore_index

In [25]:
names_1981 = pd.read_csv('baby_names/names1981.csv', 
                         header=None, 
                         names=['name','gender','count'])
names_1881 = pd.read_csv('baby_names/names1881.csv', 
                         header=None, 
                         names=['name','gender','count'])

In [26]:
names_1881['year'] = 1881
names_1981['year'] = 1981

In [28]:
combined_names = names_1881.append(names_1981, ignore_index=True)
print(names_1981.shape)
print(names_1881.shape)
print(combined_names.shape)

(19455, 4)
(1935, 4)
(21390, 4)


In [30]:
print(combined_names[combined_names['name'] == 'Morgan'])

         name gender  count  year
1283   Morgan      M     23  1881
2096   Morgan      F   1769  1981
14390  Morgan      M    766  1981
