# Pandas Tips: `drop()`

In [1]:
import pandas as pd

Check your pandas version to ensure similar behavior. 

_Version 2.1.1 was released September 2023._

In [2]:
pd.__version__

'1.5.3'

### Load data

Load data from GitHub

_Check out my [read_excel video](https://youtu.be/2FntKvSPGmU) to learn more about this step._

In [3]:
df = pd.read_excel(
    'https://github.com/kimfetti/Videos/blob/master/Pandas_Tips/data/store_data.xlsx?raw=True',
    sheet_name='purchases',
    skiprows=1,
    skipfooter=1
)

In [4]:
df.head()

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No


In [5]:
df.describe()

Unnamed: 0,Customer ID,Product ID
count,1818.0,1818.0
mean,5556.50165,27.269527
std,2589.461042,15.688456
min,1060.0,1.0
25%,3328.0,14.0
50%,5637.0,27.0
75%,7806.0,41.0
max,9970.0,54.0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1818 entries, 0 to 1817
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date of Purchase     1818 non-null   datetime64[ns]
 1   Customer ID          1818 non-null   int64         
 2   Product ID           1818 non-null   int64         
 3   Product Description  1818 non-null   object        
 4   Cost                 1818 non-null   object        
 5   Online               1818 non-null   object        
dtypes: datetime64[ns](1), int64(2), object(3)
memory usage: 85.3+ KB


## Basics

### Dropping a column

In [7]:
# produces an intentional error

df.drop('Online')  # by default it searches in the row the name of the column that is to be droped (axis = 0)

KeyError: "['Online'] not found in axis"

In [8]:
df.drop('Online', axis=1)

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost
0,2021-10-11,4576,11,Gray Sweater,59.99
1,2020-11-13,9488,32,Running Shoes,49.99
2,2020-08-20,7265,3,Blue Jeans,49.99
3,2021-04-07,8555,39,Polo Shirt,29.99
4,2022-01-10,7023,44,Sweatshirt,49.99
...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99
1814,2022-11-02,9176,54,Casual Dress,69.99
1815,2023-05-12,8588,30,Maxi Skirt,39.99
1816,2021-12-28,1892,27,Leather Jacket,1119.99


In [9]:
df.drop(columns='Online')  # another way

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost
0,2021-10-11,4576,11,Gray Sweater,59.99
1,2020-11-13,9488,32,Running Shoes,49.99
2,2020-08-20,7265,3,Blue Jeans,49.99
3,2021-04-07,8555,39,Polo Shirt,29.99
4,2022-01-10,7023,44,Sweatshirt,49.99
...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99
1814,2022-11-02,9176,54,Casual Dress,69.99
1815,2023-05-12,8588,30,Maxi Skirt,39.99
1816,2021-12-28,1892,27,Leather Jacket,1119.99


It's common to receive an error when dropping columns.

Be sure to either:
- Set `axis=1`
- Or use the `columns` argument

### Dropping a row

In [10]:
df

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


In [11]:
df.drop(2)

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
5,2023-07-07,3412,25,Cargo Pants,54.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


Note we are dropping by the index name not position number!

In [12]:
df.rename({0: 'first_row'})

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
first_row,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


In [13]:
df.rename({0: 'first_row'}).drop(index='first_row')

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
5,2023-07-07,3412,25,Cargo Pants,54.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


## $\star$ Level Up $\star$

### Dropping multiple columns

In [14]:
df.head()

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No


In [15]:
df.drop(columns=['Customer ID', 'Online'])

Unnamed: 0,Date of Purchase,Product ID,Product Description,Cost
0,2021-10-11,11,Gray Sweater,59.99
1,2020-11-13,32,Running Shoes,49.99
2,2020-08-20,3,Blue Jeans,49.99
3,2021-04-07,39,Polo Shirt,29.99
4,2022-01-10,44,Sweatshirt,49.99
...,...,...,...,...
1813,2020-01-26,14,Plaid Shorts,22.99
1814,2022-11-02,54,Casual Dress,69.99
1815,2023-05-12,30,Maxi Skirt,39.99
1816,2021-12-28,27,Leather Jacket,1119.99


### Dropping information permanently: `inplace`

In [16]:
df

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


In [17]:
df.drop(columns='Online')

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost
0,2021-10-11,4576,11,Gray Sweater,59.99
1,2020-11-13,9488,32,Running Shoes,49.99
2,2020-08-20,7265,3,Blue Jeans,49.99
3,2021-04-07,8555,39,Polo Shirt,29.99
4,2022-01-10,7023,44,Sweatshirt,49.99
...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99
1814,2022-11-02,9176,54,Casual Dress,69.99
1815,2023-05-12,8588,30,Maxi Skirt,39.99
1816,2021-12-28,1892,27,Leather Jacket,1119.99


In [18]:
df  # still online column is there as the argument inplace =True was not given

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost,Online
0,2021-10-11,4576,11,Gray Sweater,59.99,No
1,2020-11-13,9488,32,Running Shoes,49.99,Yes
2,2020-08-20,7265,3,Blue Jeans,49.99,Yes
3,2021-04-07,8555,39,Polo Shirt,29.99,No
4,2022-01-10,7023,44,Sweatshirt,49.99,No
...,...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99,Yes
1814,2022-11-02,9176,54,Casual Dress,69.99,No
1815,2023-05-12,8588,30,Maxi Skirt,39.99,No
1816,2021-12-28,1892,27,Leather Jacket,1119.99,Yes


In [19]:
df.drop(columns='Online', inplace=True)

In [20]:
df # permanently deleted as inplace= True

Unnamed: 0,Date of Purchase,Customer ID,Product ID,Product Description,Cost
0,2021-10-11,4576,11,Gray Sweater,59.99
1,2020-11-13,9488,32,Running Shoes,49.99
2,2020-08-20,7265,3,Blue Jeans,49.99
3,2021-04-07,8555,39,Polo Shirt,29.99
4,2022-01-10,7023,44,Sweatshirt,49.99
...,...,...,...,...,...
1813,2020-01-26,1110,14,Plaid Shorts,22.99
1814,2022-11-02,9176,54,Casual Dress,69.99
1815,2023-05-12,8588,30,Maxi Skirt,39.99
1816,2021-12-28,1892,27,Leather Jacket,1119.99
