https://www.youtube.com/watch?v=XaCSdr7pPmY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=20

# When should I use the "inplace" parameter in pandas?

##Always check with shift+tab if 'inplace' default it is false! Otherwise the changes will be permanentelly

In [51]:
import pandas as pd

In [52]:
# read a dataset of UFO reports into a DataFrame
ufo = pd.read_csv('http://bit.ly/uforeports')

In [53]:
ufo.shape

(18241, 5)

In [54]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [55]:
#Just for visualization
ufo.drop

<bound method DataFrame.drop of                        City Colors Reported Shape Reported State  \
0                    Ithaca             NaN       TRIANGLE    NY   
1               Willingboro             NaN          OTHER    NJ   
2                   Holyoke             NaN           OVAL    CO   
3                   Abilene             NaN           DISK    KS   
4      New York Worlds Fair             NaN          LIGHT    NY   
5               Valley City             NaN           DISK    ND   
6               Crater Lake             NaN         CIRCLE    CA   
7                      Alma             NaN           DISK    MI   
8                   Eklutna             NaN          CIGAR    AK   
9                   Hubbard             NaN       CYLINDER    OR   
10                  Fontana             NaN          LIGHT    CA   
11                 Waterloo             NaN       FIREBALL    AL   
12                   Belton             RED         SPHERE    SC   
13              

In [56]:
# remove the 'City' column (doesn't affect the DataFrame since inplace=False)
ufo.drop('City',axis=1).head(3)

Unnamed: 0,Colors Reported,Shape Reported,State,Time
0,,TRIANGLE,NY,6/1/1930 22:00
1,,OTHER,NJ,6/30/1930 20:00
2,,OVAL,CO,2/15/1931 14:00


In [57]:
ufo.head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [58]:
# When use inplace, nothing immediatally happen, which means something change!
# shift tab shows inplace is false by default
# remove the 'City' column (does affect the DataFrame since inplace=True)
ufo.drop('City',axis=1, inplace = True)

In [11]:
#Just use inplace when have certainly from the changes you wanna implement

In [68]:
ufo.head()

Unnamed: 0_level_0,City,Colors Reported,Shape Reported,State
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6/1/1930 22:00,Ithaca,,TRIANGLE,NY
6/30/1930 20:00,Willingboro,,OTHER,NJ
2/15/1931 14:00,Holyoke,,OVAL,CO
6/1/1931 13:00,Abilene,,DISK,KS
4/18/1933 19:00,New York Worlds Fair,,LIGHT,NY


In [13]:
#dropna = reduce all data with NA
#.shape show how many are reduce form data

In [69]:
# drop a row if any value is missing from that row (doesn't affect the DataFrame since inplace=False)
ufo.dropna(how = 'any').shape

(2486, 4)

In [77]:
# confirm that no rows were actually removed
ufo.shape

(18241, 4)

In [16]:
#Make changes for visualization, however don't change the DataFrame

In [79]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [80]:
# use an assignment statement instead of the 'inplace' parameter
ufo = ufo.set_index('Time')
ufo.tail()

Unnamed: 0_level_0,City,Colors Reported,Shape Reported,State
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/31/2000 23:00,Grant Park,,TRIANGLE,IL
12/31/2000 23:00,Spirit Lake,,DISK,IA
12/31/2000 23:45,Eagle River,,,WI
12/31/2000 23:45,Eagle River,RED,LIGHT,WI
12/31/2000 23:59,Ybor,,OVAL,FL


In [18]:
#Option1 - may be more efficient

In [83]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [84]:
ufo.set_index('Time', inplace = True) 

In [20]:
#Option2 

In [86]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [87]:
ufo = ufo.set_index('Time') 

In [88]:
ufo.tail()

Unnamed: 0_level_0,City,Colors Reported,Shape Reported,State
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/31/2000 23:00,Grant Park,,TRIANGLE,IL
12/31/2000 23:00,Spirit Lake,,DISK,IA
12/31/2000 23:45,Eagle River,,,WI
12/31/2000 23:45,Eagle River,RED,LIGHT,WI
12/31/2000 23:59,Ybor,,OVAL,FL


In [None]:
#Fill missing values

In [89]:
# compare with "forward fill" strategy (doesn't affect the DataFrame since inplace=False)
ufo.fillna(method = 'ffill').tail()

Unnamed: 0_level_0,City,Colors Reported,Shape Reported,State
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12/31/2000 23:00,Grant Park,RED,TRIANGLE,IL
12/31/2000 23:00,Spirit Lake,RED,DISK,IA
12/31/2000 23:45,Eagle River,RED,DISK,WI
12/31/2000 23:45,Eagle River,RED,LIGHT,WI
12/31/2000 23:59,Ybor,RED,OVAL,FL


In [50]:
# fill missing values using "backward fill" strategy (doesn't affect the DataFrame since inplace=False)
ufo.fillna(method = 'bfill').tail()

Unnamed: 0_level_0,Colors Reported,Shape Reported,State
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12/31/2000 23:00,RED,TRIANGLE,IL
12/31/2000 23:00,RED,DISK,IA
12/31/2000 23:45,RED,LIGHT,WI
12/31/2000 23:45,RED,LIGHT,WI
12/31/2000 23:59,,OVAL,FL
