In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [5]:
np.random.seed(101)

In [7]:
df = pd.DataFrame(randn(5,4), index = ['A', 'B', 'C', 'D', 'E'], columns = ['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [8]:
#in the above
#each column is just its own pandas Series
#the whole DataFrame is 4 pandas Series sharing a common index

In [9]:
df['W']

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [10]:
type(df['W'])

pandas.core.series.Series

In [11]:
type(df)

pandas.core.frame.DataFrame

In [13]:
df[['W', 'Z']] #pass in a list of column name to the df index braces to return multiple columns

Unnamed: 0,W,Z
A,0.302665,-1.159119
B,-0.134841,0.184502
C,0.807706,0.329646
D,-0.497104,0.484752
E,-0.116773,1.996652


In [14]:
small_df = df[['W', 'Z']]
small_df

Unnamed: 0,W,Z
A,0.302665,-1.159119
B,-0.134841,0.184502
C,0.807706,0.329646
D,-0.497104,0.484752
E,-0.116773,1.996652


In [16]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [21]:
df['New'] = df['W'] + df['Y'] #this adds in a New column in place

In [22]:
df

Unnamed: 0,W,X,Y,Z,New
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [23]:
df.drop('New', axis = 1)  #axis 1 tells the code to check the column names

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [24]:
df  #notice the .drop() method does not happen in place unless I tell it to

Unnamed: 0,W,X,Y,Z,New
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [25]:
df.drop('New', axis = 1, inplace = True)

In [26]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [28]:
df.drop('E')  #axis = 0 is the default

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


In [29]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [30]:
df.shape #shape is a (5,4) tuple, 5 rows and 4 columns. That is why rows are the zero axis and columns are the 1 axis
         #its the index position of the shape

(5, 4)

In [31]:
#selecting rows in a dataframe

In [32]:
df.loc['A']   #the loc method uses square brackets. Unusual for a method but this is the way it works with pandas
              #loc method stand for row location

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64

In [33]:
#the above output shows us that the rows are Series as well

In [34]:
#iloc method stands for row index location

In [35]:
df.iloc[0]

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64

In [36]:
#so can select rows in two ways.

In [37]:
#now lets select subsets of rows and columns

In [38]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [39]:
df.loc['B', 'Y'] #returns the value at row B column Y

0.16690463609281317

In [40]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [41]:
df.loc[['A','B'], ['W', 'Y']] #rows, columns passed in as lists

Unnamed: 0,W,Y
A,0.302665,-1.706086
B,-0.134841,0.166905
