In [3]:
import pandas as pd
import numpy as np
import random 

In [6]:
df = pd.DataFrame(np.random.randn(4,4), ['A','B','C','D'], ['W','X','Y','Z'])

In [7]:
df

Unnamed: 0,W,X,Y,Z
A,-0.588051,-1.282133,-0.660592,-0.98684
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [8]:
df['W']

A   -0.588051
B    0.796134
C    0.679762
D    0.984116
Name: W, dtype: float64

In [9]:
df[['W','X']]

Unnamed: 0,W,X
A,-0.588051,-1.282133
B,0.796134,0.958621
C,0.679762,-0.408474
D,0.984116,0.527485


In [11]:
df.W

A   -0.588051
B    0.796134
C    0.679762
D    0.984116
Name: W, dtype: float64

In [12]:
type(df['W'])

pandas.core.series.Series

In [18]:
#Creating a new column

df['new'] = df['W'] + df['Z'] #it will sum the values from them, and return a new value.

In [19]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.588051,-1.282133,-0.660592,-0.98684,-1.574891
B,0.796134,0.958621,0.718814,-0.093343,0.702791
C,0.679762,-0.408474,0.161687,-0.568521,0.111242
D,0.984116,0.527485,-0.865102,2.18395,3.168066


In [22]:
df.drop('new', axis=1) #Without 'inplace'

Unnamed: 0,W,X,Y,Z
A,-0.588051,-1.282133,-0.660592,-0.98684
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [23]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.588051,-1.282133,-0.660592,-0.98684,-1.574891
B,0.796134,0.958621,0.718814,-0.093343,0.702791
C,0.679762,-0.408474,0.161687,-0.568521,0.111242
D,0.984116,0.527485,-0.865102,2.18395,3.168066


In [24]:
df.drop('new', axis=1, inplace=True) #Observe the difference with and without 'inplace'

In [25]:
df

Unnamed: 0,W,X,Y,Z
A,-0.588051,-1.282133,-0.660592,-0.98684
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [26]:
df.loc['A'] #Select using labels

W   -0.588051
X   -1.282133
Y   -0.660592
Z   -0.986840
Name: A, dtype: float64

In [28]:
# 'iloc' is position-based indexing: we specify the row and column positions 

df.iloc[1] 

W    0.796134
X    0.958621
Y    0.718814
Z   -0.093343
Name: B, dtype: float64

In [29]:
# 'loc' is label-based indexing: we specify the row labels ('A', 'C') 
# and column labels ('X', 'Y') to extract the corresponding values.

df.loc[['A','C'],['X','Y']] 

Unnamed: 0,X,Y
A,-1.282133,-0.660592
C,-0.408474,0.161687


In [30]:
df>0

Unnamed: 0,W,X,Y,Z
A,False,False,False,False
B,True,True,True,False
C,True,False,True,False
D,True,True,False,True


In [32]:
df

Unnamed: 0,W,X,Y,Z
A,-0.588051,-1.282133,-0.660592,-0.98684
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [33]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,,,,
B,0.796134,0.958621,0.718814,
C,0.679762,,0.161687,
D,0.984116,0.527485,,2.18395


In [36]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [35]:
df[df['W']>0]['Y'] #According to the condition, it shouldn't produce negative value (D   -0.865102). It occurs due to inconsistency in the dataset or a mistake in the filtering logic

B    0.718814
C    0.161687
D   -0.865102
Name: Y, dtype: float64

In [41]:
df[df['W']>0][['Y','Z']]


Unnamed: 0,Y,Z
B,0.718814,-0.093343
C,0.161687,-0.568521
D,-0.865102,2.18395


In [43]:
#Explaination of above line of code
qwerty = df['W']>0
qwerty

A    False
B     True
C     True
D     True
Name: W, dtype: bool

In [44]:
qwerty = df['W']>0
result = df[qwerty]
result

Unnamed: 0,W,X,Y,Z
B,0.796134,0.958621,0.718814,-0.093343
C,0.679762,-0.408474,0.161687,-0.568521
D,0.984116,0.527485,-0.865102,2.18395


In [45]:
qwerty = df['W']>0
result = df[qwerty]
col = ['Y','Z']
result[col]

Unnamed: 0,Y,Z
B,0.718814,-0.093343
C,0.161687,-0.568521
D,-0.865102,2.18395


In [46]:
df.reset_index() # Reset to default 0,1...n index

Unnamed: 0,index,W,X,Y,Z
0,A,-0.588051,-1.282133,-0.660592,-0.98684
1,B,0.796134,0.958621,0.718814,-0.093343
2,C,0.679762,-0.408474,0.161687,-0.568521
3,D,0.984116,0.527485,-0.865102,2.18395


In [47]:
cath = 'CA PA VA LA'.split()

In [48]:
df['Areas']=cath

In [49]:
df

Unnamed: 0,W,X,Y,Z,Areas
A,-0.588051,-1.282133,-0.660592,-0.98684,CA
B,0.796134,0.958621,0.718814,-0.093343,PA
C,0.679762,-0.408474,0.161687,-0.568521,VA
D,0.984116,0.527485,-0.865102,2.18395,LA


In [50]:
df.set_index('Areas')

Unnamed: 0_level_0,W,X,Y,Z
Areas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,-0.588051,-1.282133,-0.660592,-0.98684
PA,0.796134,0.958621,0.718814,-0.093343
VA,0.679762,-0.408474,0.161687,-0.568521
LA,0.984116,0.527485,-0.865102,2.18395
