## 5 Tricks to Master Pandas append()

https://towardsdatascience.com/5-tricks-to-master-pandas-append-ede4318cc700

1. Appending a single row
2. Appending multiple rows
3. When columns names don’t align
4. With argument sort=True
5. With argument verify_itegrity=True

### 1. Appending a single row

In [1]:
# %load command.py

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

%config InlineBackend.figure_format='svg'
plt.rcParams['figure.dpi']=120

pd.options.display.float_format='{:,.2f}'.format
pd.set_option('display.max_colwidth', None)


In [2]:
df1 = pd.DataFrame({
    'name': ['A', 'B', 'C', 'D'],
    'math': [60,89,82,70],
    'physics': [66,95,83,66],
    'chemistry': [61,91,77,70]
})

In [3]:
### Appending a Series
my_series = pd.Series(
    ['E', 60, 70, 80],  
    index=['name', 'math', 'physics', 'chemistry']
)
df1.append(my_series, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,60,70,80


In [4]:
### Appending a dict
my_dict = {'name': 'E', 'math': 60, 'physics': 80, 'chemistry': 90 }
df1.append(my_dict, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,60,80,90


In [5]:
### Appending a list - wrong
my_list = ['E', 60, 70, 80]
df1.append(my_list, ignore_index=True)

Unnamed: 0,0,chemistry,math,name,physics
0,,61.0,60.0,A,66.0
1,,91.0,89.0,B,95.0
2,,77.0,82.0,C,83.0
3,,70.0,70.0,D,66.0
4,E,,,,
5,60,,,,
6,70,,,,
7,80,,,,


**To fix it, we need to convert the list to a Pandas Series using pd.Series() with the argument `index=df1.columns:`**

In [6]:
### Appending a list - correct
my_list = ['E', 60, 70, 80]
# Convert a lint to Pandas Series
a_series = pd.Series(my_list, index=df1.columns)
df1.append(a_series, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,60,70,80


### 2. Appending multiple rows

In [7]:
# Create a DataFrame
df2 = pd.DataFrame({
    'name': ['E', 'F', 'G', 'H'],
    'math': [66,95,83,66],
    'physics': [60,89,82,70],
    'chemistry': [90,81,78,90]
})

In [8]:
df1.append(df2)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
0,E,66,60,90
1,F,95,89,81
2,G,83,82,78
3,H,66,70,90


In [9]:
# With ingore_index=True
df1.append(df2, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,66,60,90
5,F,95,89,81
6,G,83,82,78
7,H,66,70,90


In [10]:
## A list of dict
my_dict_list = [
  {'name' : 'E', 'math' : 66, 'physics' : 60, 'chemistry': 90 },
  {'name' : 'F', 'math' : 95, 'physics' : 89, 'chemistry': 81 },
  {'name' : 'G', 'math' : 83, 'physics' : 82, 'chemistry': 78 },
  {'name' : 'H', 'math' : 66, 'physics' : 70, 'chemistry': 90 }
]

df1.append(my_dict_list, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,66,60,90
5,F,95,89,81
6,G,83,82,78
7,H,66,70,90


In [11]:
# A list of lists
my_list_of_list = [
    ['E', 66, 60, 90],
    ['F', 95, 89, 81],
    ['G', 83, 82, 78],
    ['H', 66, 70, 90]
]
# Convert a list of lists to a DataFrame
my_df = pd.DataFrame(my_list_of_list, columns=df1.columns)
df1.append(my_df, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60,66,61
1,B,89,95,91
2,C,82,83,77
3,D,70,66,70
4,E,66,60,90
5,F,95,89,81
6,G,83,82,78
7,H,66,70,90


### 3. When column names don’t align

In [12]:
# Missing columns
my_dict_list_missing=[
    {'name': 'E', 'chemistry':90},
    {'name':'F', 'math':95},
    {'name':'G', 'math':83, 'physics':82},
    {'name':'H', 'math':66, 'chemistry':90}
    
]

df1.append(my_dict_list_missing, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry
0,A,60.0,66.0,61.0
1,B,89.0,95.0,91.0
2,C,82.0,83.0,77.0
3,D,70.0,66.0,70.0
4,E,,,90.0
5,F,95.0,,
6,G,83.0,82.0,
7,H,66.0,,90.0


In [13]:
# New columns
my_dict_list_missing=[
    {'name': 'E', 'math':66, 'physics':60, 'chemistry':90, 'biology':80},
    {'name':'F', 'math':95, 'physics':89, 'chemistry':81, 'biology':60},
    {'name':'G', 'math':83, 'physics':82, 'chemistry':78, 'biology':65},
    {'name':'H', 'math':66, 'physics':70, 'chemistry': 90, 'biology':80}
    
]


df1.append(my_dict_list_missing, ignore_index=True)

Unnamed: 0,name,math,physics,chemistry,biology
0,A,60,66,61,
1,B,89,95,91,
2,C,82,83,77,
3,D,70,66,70,
4,E,66,60,90,80.0
5,F,95,89,81,60.0
6,G,83,82,78,65.0
7,H,66,70,90,80.0


### 4 Sorting column name with sort=True

In [14]:
# If we prefer the output be sorted alphabetically, we can set the argument sort to True:
df3=df1.append(my_dict_list, ignore_index=True, sort=True)
cols=['name', 'chemistry', 'math', 'physics']
df3=df3[cols]
df3

Unnamed: 0,name,chemistry,math,physics
0,A,61,60,66
1,B,91,89,95
2,C,77,82,83
3,D,70,70,66
4,E,90,66,60
5,F,81,95,89
6,G,78,83,82
7,H,90,66,70


### 5 Avoid duplicate index with verify_integrity=True
There is an argument verify_integrity in Pandas append() to configure whether the output allows duplicate index. verify_integrity defaults to False , which means the output allows duplicate index. We can set it to True to raise an error if two or more rows with the same index. For example, it’s complaining the duplicate index value [1]:


In [22]:
# Create a DataFrame
df2 = pd.DataFrame({
    'name': ['E', 'F', 'G', 'H'],
    'math': [66,95,83,66],
    'physics': [60,89,82,70],
    'chemistry': [90,81,78,90]
}, index=[1, 4, 5, 6])
# Error Message
df1.append(df2, verify_integrity=True)

ValueError: Indexes have overlapping values: Int64Index([1], dtype='int64')

### Change the order of columns
https://stackoverflow.com/questions/13148429/how-to-change-the-order-of-dataframe-columns

In [15]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.rand(10, 5))

In [16]:
df['mean'] = df.mean(1)
df

Unnamed: 0,0,1,2,3,4,mean
0,0.36,0.77,0.35,0.76,0.69,0.59
1,0.32,0.32,0.48,0.16,0.12,0.28
2,0.51,0.92,0.48,0.45,0.48,0.57
3,0.16,0.41,0.66,0.2,0.38,0.36
4,0.1,0.2,0.37,0.17,0.44,0.26
5,0.26,0.85,0.29,0.92,0.12,0.49
6,0.93,0.93,0.45,0.46,0.45,0.65
7,0.64,0.25,0.54,0.62,0.06,0.42
8,0.67,0.75,0.47,0.6,0.54,0.61
9,0.62,0.37,0.13,0.19,0.35,0.33


In [17]:
cols=df.columns.tolist()
cols

[0, 1, 2, 3, 4, 'mean']

In [18]:
cols[:-1]
cols[-1:]
cols=cols[-1:] + cols[:-1]
cols

[0, 1, 2, 3, 4]

['mean']

['mean', 0, 1, 2, 3, 4]

In [19]:
# reorder the dataframe
df=df[cols]
df

Unnamed: 0,mean,0,1,2,3,4
0,0.59,0.36,0.77,0.35,0.76,0.69
1,0.28,0.32,0.32,0.48,0.16,0.12
2,0.57,0.51,0.92,0.48,0.45,0.48
3,0.36,0.16,0.41,0.66,0.2,0.38
4,0.26,0.1,0.2,0.37,0.17,0.44
5,0.49,0.26,0.85,0.29,0.92,0.12
6,0.65,0.93,0.93,0.45,0.46,0.45
7,0.42,0.64,0.25,0.54,0.62,0.06
8,0.61,0.67,0.75,0.47,0.6,0.54
9,0.33,0.62,0.37,0.13,0.19,0.35
