In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [6]:
df = pd.DataFrame(randn(4,3),["A","B","C","D"], ["Column1","Column2","Column3"])
df

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
B,-1.443732,-0.408692,1.84971
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


In [7]:
df > -1 # Values that bigger than -1 is returned "True", less than -1 is returned "False"

Unnamed: 0,Column1,Column2,Column3
A,True,True,True
B,False,True,True
C,True,True,True
D,True,True,False


In [8]:
booleanDf = df > 0 # We assigned a variable that filters "df > 0"

In [9]:
booleanDf

Unnamed: 0,Column1,Column2,Column3
A,True,True,True
B,False,False,True
C,True,True,False
D,True,False,False


In [10]:
df[booleanDf] # True ones remain same but False ones returns NaN

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
B,,,1.84971
C,0.55769,0.239644,
D,0.930388,,


In [11]:
df[df > 0] # Same thing, different way to show

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
B,,,1.84971
C,0.55769,0.239644,
D,0.930388,,


***

In [12]:
df["Column1"] # We did this already

A    0.083878
B   -1.443732
C    0.557690
D    0.930388
Name: Column1, dtype: float64

In [13]:
df["Column1"] > 0 # It returns as boolean

A     True
B    False
C     True
D     True
Name: Column1, dtype: bool

In [14]:
df[df["Column1"] > 0] # When we did this, program doesn't print B index, because at above you can see it was false. That's why program didn't read B.

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


Here it checks "Column1" and goes down from there. In index "A" it's bigger than "0", so program prints the rest of index "A". Index "C" and "D" also same but at the intersection of "Column1" and index "B", the value less than "0" that's why it doesn't print any value from that index.
#### Let's make another one:

In [15]:
df["Column1"] > 0.5

A    False
B    False
C     True
D     True
Name: Column1, dtype: bool

In [16]:
df[df["Column1"] > 0.5]

Unnamed: 0,Column1,Column2,Column3
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


***
#### Let's continue with a little bit harder ones and use "and" operator to make them connect each other:

In [33]:
df

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
B,-1.443732,-0.408692,1.84971
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


In [36]:
df[df["Column1"] > 0] # To check here after connecting both

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


In [37]:
df[df["Column2"] > 0] # To check here after connecting both

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234


##### Note: In pandas, "and" operator is "&"

In [38]:
df[(df["Column1"] > 0) & (df["Column2"] > 0)]

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234


#### As you can see in Statement[36] and [37], the common indices are "A" and "C". That's why program only prints those indices and ignores "D" 
***
#### Let's use "or" operator:

In [39]:
df[df["Column1"] > 0] # To check here after connecting both

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


In [40]:
df[df["Column2"] > 0] # To check here after connecting both

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234


##### Note: In pandas "or" opearator is " |"

In [42]:
df[(df["Column1"] > 0) | (df["Column2"] > 0)] # To check here after connecting both

Unnamed: 0,Column1,Column2,Column3
A,0.083878,0.41777,0.799566
C,0.55769,0.239644,-0.377234
D,0.930388,-0.51917,-1.260907


#### As you can see in Statements[39] and [40], every index are there except "B". That's why program prints the indices that both statements include.
***

## Methods for Adding New Columns in DataFrame:
### 1-

In [43]:
df["Column4"] = pd.Series(randn(4), ["A","B","C","D"])

In [44]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,0.083878,0.41777,0.799566,1.586532
B,-1.443732,-0.408692,1.84971,-1.351395
C,0.55769,0.239644,-0.377234,2.369576
D,0.930388,-0.51917,-1.260907,-1.158347


***
### 2-

In [55]:
df["Column5"] = randn(4)

In [56]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,0.083878,0.41777,0.799566,1.586532,-0.95921
B,-1.443732,-0.408692,1.84971,-1.351395,1.780559
C,0.55769,0.239644,-0.377234,2.369576,0.899924
D,0.930388,-0.51917,-1.260907,-1.158347,-1.524003


***
## set_index() and Changing Indices:

In [62]:
df["Column6"] = ["newValue1", "newValue2", "newValue3", "newValue4"] # We added string values instead numbers

In [63]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,0.083878,0.41777,0.799566,1.586532,-0.95921,newValue1
B,-1.443732,-0.408692,1.84971,-1.351395,1.780559,newValue2
C,0.55769,0.239644,-0.377234,2.369576,0.899924,newValue3
D,0.930388,-0.51917,-1.260907,-1.158347,-1.524003,newValue4


#### Let's say we want to change the index names to Column6 values' names:

In [64]:
df.set_index("Column6")

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
newValue1,0.083878,0.41777,0.799566,1.586532,-0.95921
newValue2,-1.443732,-0.408692,1.84971,-1.351395,1.780559
newValue3,0.55769,0.239644,-0.377234,2.369576,0.899924
newValue4,0.930388,-0.51917,-1.260907,-1.158347,-1.524003


#### Here indices "A", "B", "C" and "D" became "Column6" values' names and "Column6" became the indices' name. But when we check the dataframe again we'll see that it's not updated:

In [65]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,0.083878,0.41777,0.799566,1.586532,-0.95921,newValue1
B,-1.443732,-0.408692,1.84971,-1.351395,1.780559,newValue2
C,0.55769,0.239644,-0.377234,2.369576,0.899924,newValue3
D,0.930388,-0.51917,-1.260907,-1.158347,-1.524003,newValue4


#### Because if you press Shift+Tab, set_index() function has the "inplace = False" parameter as default. So we should make it "True" to update the dataframe:

In [67]:
df.set_index("Column6", inplace = True)

In [69]:
df # Now it's okay

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
newValue1,0.083878,0.41777,0.799566,1.586532,-0.95921
newValue2,-1.443732,-0.408692,1.84971,-1.351395,1.780559
newValue3,0.55769,0.239644,-0.377234,2.369576,0.899924
newValue4,0.930388,-0.51917,-1.260907,-1.158347,-1.524003
