# Adding, Removing Columns and Rows

In [1]:
import pandas as pd

In [2]:
dict = {
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@gmail.com", "JohnDoe@gmail.com"],
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"]
}

In [3]:
df = pd.DataFrame(dict)
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


In [4]:
# Adding new column
df['full_name'] = df['first'] + ' ' + df['last']

In [5]:
df

Unnamed: 0,email,first,last,full_name
0,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer
1,JaneDoe@gmail.com,Jane,Doe,Jane Doe
2,JohnDoe@gmail.com,John,Doe,John Doe


In [6]:
# Removing columns
df2 = df.copy()
df2.drop(columns=["first", "last"], inplace=True)
df2

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@gmail.com,Jane Doe
2,JohnDoe@gmail.com,John Doe


In [7]:
# Splitting a column into two
split_Series = df2['full_name'].str.split(' ') 
# By default, the splitting is based on white space, so even if you don't pass split function, it will split based on space.
print('column splitted based on white space producing a Series:')
split_Series

column splitted based on white space producing a Series:


0    [Corey, Schafer]
1         [Jane, Doe]
2         [John, Doe]
Name: full_name, dtype: object

In [8]:
# To split a column to produce a DataFrame another parameter has to be passed:
split_df = df2['full_name'].str.split(' ', expand=True)
split_df

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [9]:
# Now these columns can be appended to the DataFrame as:
df2[ ['first', 'last'] ] = split_df
df2

Unnamed: 0,email,full_name,first,last
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe


In [10]:
# Adding a new column to a DataFrame with predefined values
df2['new_col'] = [i for i in range(len(df2))]
df2

Unnamed: 0,email,full_name,first,last,new_col
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer,0
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe,1
2,JohnDoe@gmail.com,John Doe,John,Doe,2


In [11]:
# Appending a dictionary as a row to the end of the DataFrame
df2.loc[len(df2)] = {'email': 'appendeduser@gmail.com', "first": "Appended", 'last': 'User'}
df2

Unnamed: 0,email,full_name,first,last,new_col
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer,0.0
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe,1.0
2,JohnDoe@gmail.com,John Doe,John,Doe,2.0
3,appendeduser@gmail.com,,Appended,User,


- **Note that the fields whose values are not defined are initialized with NaN**

In [12]:
# Appending a row as a list at the end of the DataFrame
# All columns have to be defined!
df2.loc[len(df2)] = ["listuser@gmail.com", "List User", "List", "User", 0]
df2

Unnamed: 0,email,full_name,first,last,new_col
0,CoreyMSchafer@gmail.com,Corey Schafer,Corey,Schafer,0.0
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe,1.0
2,JohnDoe@gmail.com,John Doe,John,Doe,2.0
3,appendeduser@gmail.com,,Appended,User,
4,listuser@gmail.com,List User,List,User,0.0


In [13]:
dict2 = {
    "email": ["ahmed@gmail.com", "danish@gmail.com", "ali@gmail.com"],
    "first": ["Ahmed", "Danish", "Ali"],
    "last": ["Danial", "Wattoo", "Naeem"],
    "full_name": ["Ahmed Danial", "Danish Wattoo", "Ali Naeem"]
}
df_new = pd.DataFrame(dict2)
df_new

Unnamed: 0,email,first,last,full_name
0,ahmed@gmail.com,Ahmed,Danial,Ahmed Danial
1,danish@gmail.com,Danish,Wattoo,Danish Wattoo
2,ali@gmail.com,Ali,Naeem,Ali Naeem


In [14]:
# Concatenating two DataFrames while retaining the ids from both columns (May compromise the id uniqueness)
df_new2 = df_new.copy()
df_new2 = pd.concat([df, df_new2])
df_new2

Unnamed: 0,email,first,last,full_name
0,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer
1,JaneDoe@gmail.com,Jane,Doe,Jane Doe
2,JohnDoe@gmail.com,John,Doe,John Doe
0,ahmed@gmail.com,Ahmed,Danial,Ahmed Danial
1,danish@gmail.com,Danish,Wattoo,Danish Wattoo
2,ali@gmail.com,Ali,Naeem,Ali Naeem


- **As you can see, the default integer identifier is no longer unique**

In [15]:
# Concatenating two DataFrames while ensuring id uniqueness
df_new3 = df_new.copy()
df_new3 = pd.concat([ df, df_new3 ], ignore_index=True)
print('concatenated DataFrames with id of the first DataFrame in the list continued and that of others ignored:')
df_new3

concatenated DataFrames with id of the first DataFrame in the list continued and that of others ignored:


Unnamed: 0,email,first,last,full_name
0,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer
1,JaneDoe@gmail.com,Jane,Doe,Jane Doe
2,JohnDoe@gmail.com,John,Doe,John Doe
3,ahmed@gmail.com,Ahmed,Danial,Ahmed Danial
4,danish@gmail.com,Danish,Wattoo,Danish Wattoo
5,ali@gmail.com,Ali,Naeem,Ali Naeem


In [16]:
# Appending a specific row from a DataFrame to the end of another DataFrame
# When we access a row like this: df.loc[0], it is accessed as a Series. To access it as a DataFrame you can write:
# df.loc[ [0] ]
df_new4 = df_new.copy()
df_new4 = pd.concat([df_new4, df.loc[ [0] ]], ignore_index=True)
df_new4

Unnamed: 0,email,first,last,full_name
0,ahmed@gmail.com,Ahmed,Danial,Ahmed Danial
1,danish@gmail.com,Danish,Wattoo,Danish Wattoo
2,ali@gmail.com,Ali,Naeem,Ali Naeem
3,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer


In [27]:
df3 = df.copy()
df3

Unnamed: 0,email,first,last,full_name
0,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer
1,JaneDoe@gmail.com,Jane,Doe,Jane Doe
2,JohnDoe@gmail.com,John,Doe,John Doe


In [29]:
# Removing a row by specifying index
df3.drop(index=1,inplace=True)
df3

Unnamed: 0,email,first,last,full_name
0,CoreyMSchafer@gmail.com,Corey,Schafer,Corey Schafer
2,JohnDoe@gmail.com,John,Doe,John Doe


In [30]:
# Removing a row by specifying value of a column
idx = df3.loc[ df3['first'] == 'Corey' ].index
df3.drop(index=idx, inplace=True)
df3

Unnamed: 0,email,first,last,full_name
2,JohnDoe@gmail.com,John,Doe,John Doe
