# 1. Add and Remove rows & columns from dataframe
# 2. combine 2 or more columns in to 1 column

In [1]:
people = {
    "first":['vikas','rakesh','mahesh', 'saurabh'],
    "last":['sharma','puri','jain', 'sharma'],
    "email":["vikas_sharma@gmail.com","rakesh_puri@gmail.com","mahesh_jain@gmail.com", "saurabh_sharma@gmail.com"]
}

In [2]:
import pandas as pd

In [3]:
df_dict = pd.DataFrame(people)
df_dict

Unnamed: 0,first,last,email
0,vikas,sharma,vikas_sharma@gmail.com
1,rakesh,puri,rakesh_puri@gmail.com
2,mahesh,jain,mahesh_jain@gmail.com
3,saurabh,sharma,saurabh_sharma@gmail.com


In [4]:
# df = pd.read_csv('D:/GIT_Repositories/pandas/survey_results_public.csv', index_col = 'Respondent')
# schema_df = pd.read_csv('D:/GIT_Repositories/pandas/survey_results_schema.csv', index_col = 'Column')

# pd.set_option('display.max_columns', 85)
# pd.set_option('display.max_rows', 85)

# Add & Remove columns

### 1. create a new column "full_name" using the first and last name columns


In [7]:

df_dict['full_name'] = df_dict['first'] + ' ' + df_dict['last']
df_dict

Unnamed: 0,first,last,email,full_name
0,vikas,sharma,vikas_sharma@gmail.com,vikas sharma
1,rakesh,puri,rakesh_puri@gmail.com,rakesh puri
2,mahesh,jain,mahesh_jain@gmail.com,mahesh jain
3,saurabh,sharma,saurabh_sharma@gmail.com,saurabh sharma


### 2. delete first and last name columns from dataframe


In [10]:
df_dict.drop(columns = ['first', 'last'])   # to make the change permanent apply -- inplace=True

Unnamed: 0,email,full_name
0,vikas_sharma@gmail.com,vikas sharma
1,rakesh_puri@gmail.com,rakesh puri
2,mahesh_jain@gmail.com,mahesh jain
3,saurabh_sharma@gmail.com,saurabh sharma


In [11]:
df_dict.drop(columns = ['first', 'last'], inplace = True)
df_dict

Unnamed: 0,email,full_name
0,vikas_sharma@gmail.com,vikas sharma
1,rakesh_puri@gmail.com,rakesh puri
2,mahesh_jain@gmail.com,mahesh jain
3,saurabh_sharma@gmail.com,saurabh sharma


### 3. derive new columns first & last names from the full_name columns

In [12]:
df_dict['full_name'].str.split(' ')

0      [vikas, sharma]
1       [rakesh, puri]
2       [mahesh, jain]
3    [saurabh, sharma]
Name: full_name, dtype: object

In [13]:
# to pull out the ABOVE OUTPUT from list ---- expand = True

df_dict['full_name'].str.split(' ', expand = True)

Unnamed: 0,0,1
0,vikas,sharma
1,rakesh,puri
2,mahesh,jain
3,saurabh,sharma


In [14]:
# Assign above values to 2 new columns first and last

df_dict[['first', 'last']] = df_dict['full_name'].str.split(' ', expand = True)
df_dict

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma


# Add & Remove Rows

### 1. adding a single row of data

In the new version of Pandas, the append method is changed to _append. You can simply use _append instead of append, i.e., df._append(df2).

#### Why is it changed?

The append method in pandas looks similar to list.append in Python. That's why the append method in pandas is now modified to _append.

In [22]:
df_dict._append({'first': 'sharan'}, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,,,sharan,


### 2. Append one dataframe to other existing data frame

In [24]:
people2 = {
    "first":['megha','prateek'],
    "last":['manognya','shreshta',],
    "email":["megha_prateek@gmail.com","manognya_shreshta@gmail.com"]
}

In [25]:
df_dict2 = pd.DataFrame(people2)

In [26]:
df_dict2

Unnamed: 0,first,last,email
0,megha,manognya,megha_prateek@gmail.com
1,prateek,shreshta,manognya_shreshta@gmail.com


### Append the df_dict2  --->>  df_dict1

In [28]:
df_dict._append(df_dict2, ignore_index=True)    # though order of the columns is different in 2 dataframes it takes care

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya
5,manognya_shreshta@gmail.com,,prateek,shreshta


#### note: it neither updates the original dataframe not it has inplace=True option

use the assignment option

original dataframe = concatenated data frame

In [29]:
df_dict

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma


In [30]:
df_dict = df_dict._append(df_dict2, ignore_index=True) 
df_dict

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya
5,manognya_shreshta@gmail.com,,prateek,shreshta


# Removing Rows from Data Frame

1. by specifying index number + inplace = True
   
2. by using CONDITIONALS i.e. fetch index number using a filter + inplace = True

In [31]:
# Using index of the row to be deleted & inplace=True

df_dict.drop(index = 5)

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya


In [32]:
df_dict

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya
5,manognya_shreshta@gmail.com,,prateek,shreshta


In [38]:
# by using CONDITIONALS i.e. fetch index number using a filter + inplace = True

filt = df_dict['last'] == 'shreshta'
df_dict.drop(index = df_dict[filt].index)              # to make changes permanen

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya


In [39]:
df_dict

Unnamed: 0,email,full_name,first,last
0,vikas_sharma@gmail.com,vikas sharma,vikas,sharma
1,rakesh_puri@gmail.com,rakesh puri,rakesh,puri
2,mahesh_jain@gmail.com,mahesh jain,mahesh,jain
3,saurabh_sharma@gmail.com,saurabh sharma,saurabh,sharma
4,megha_prateek@gmail.com,,megha,manognya
5,manognya_shreshta@gmail.com,,prateek,shreshta
