In [2]:
import numpy as np
import pandas as pd

In [3]:
from numpy.random import randn

In [5]:
np.random.seed(101)
# seed to get the same random number, who ever uses same seed value

In [16]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])    
# first parameter will be data, second is row names or index, third is column names

In [7]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [9]:
df['W']      # select the values of a particular column
# This returns a series

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [11]:
type(df['W'])

pandas.core.series.Series

In [13]:
type(df)

pandas.core.frame.DataFrame

In [15]:
df.W

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [18]:
df[['W','X']]      # Select the values of multiple columns
# This returns a dataframe

Unnamed: 0,W,X
A,-0.993263,0.1968
B,1.025984,-0.156598
C,2.154846,-0.610259
D,0.147027,-0.479448
E,-0.925874,1.862864


In [19]:
# To create a new column
df['new'] = df['W'] + df['Y']

In [20]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.993263,0.1968,-1.136645,0.000366,-2.129908
B,1.025984,-0.156598,-0.031579,0.649826,0.994405
C,2.154846,-0.610259,-0.755325,-0.346419,1.399521
D,0.147027,-0.479448,0.558769,1.02481,0.705796
E,-0.925874,1.862864,-1.133817,0.610478,-2.059691


In [22]:
# To remove the columns
df.drop('new',axis=1)    
# By default axis=0, which represents index or row names
# axis=1, represents column names

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


In [23]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.993263,0.1968,-1.136645,0.000366,-2.129908
B,1.025984,-0.156598,-0.031579,0.649826,0.994405
C,2.154846,-0.610259,-0.755325,-0.346419,1.399521
D,0.147027,-0.479448,0.558769,1.02481,0.705796
E,-0.925874,1.862864,-1.133817,0.610478,-2.059691


In [24]:
# to stay and occur the changes in inplace, the inplace parameter should be set as True
# that is to make the changes the permanent, inplace should be True
# By default, inplace will be as False, in order to avoid loss of data
df.drop('new',axis=1,inplace=True) 

In [25]:
df

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


In [26]:
# To remove rows
df.drop('E',axis=0)

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481


In [27]:
df

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


In [28]:
df.shape

(5, 4)

In [30]:
# In the shape of the dataframe i.e.,(rows,columns)
# at index 0, we will have rows, therefore we refer rows as 0 axis
# at index 1, we will have columns, therefore we refer columns as 1 axis

In [31]:
# Select Columns
df[['W','X']]

Unnamed: 0,W,X
A,-0.993263,0.1968
B,1.025984,-0.156598
C,2.154846,-0.610259
D,0.147027,-0.479448
E,-0.925874,1.862864


In [32]:
# Select Rows
# row name based location 
df.loc['A']    # returns a series

W   -0.993263
X    0.196800
Y   -1.136645
Z    0.000366
Name: A, dtype: float64

In [35]:
# index based location
df.iloc[0]

W   -0.993263
X    0.196800
Y   -1.136645
Z    0.000366
Name: A, dtype: float64

In [39]:
# to select a particular value
df.loc['B','Y']
# first parameter represents row
# second parameter reprsents column

-0.031579143908112575

In [40]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,-0.993263,-1.136645
B,1.025984,-0.031579
