# Pandas DataFrame

In [None]:
import numpy as np
import pandas as pd

## DataFrame

In [None]:
columns = ['W', 'X', 'Y', 'Z']

index = ['a', 'b', 'c', 'd', 'e']

In [None]:
np.random.seed(42)

data = np.random.randint(-100, 100, size = (5, 4))

In [None]:
data

array([[  2,  79,  -8, -86],
       [  6, -29,  88, -80],
       [  2,  21, -26, -13],
       [ 16,  -1,   3,  51],
       [ 30,  49, -48, -99]])

In [None]:
df = pd.DataFrame(data = data, index = index, columns = columns)

In [None]:
df

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
c,2,21,-26,-13
d,16,-1,3,51
e,30,49,-48,-99


In [None]:
df['W']

a     2
b     6
c     2
d    16
e    30
Name: W, dtype: int64

In [None]:
type(df['W'])

pandas.core.series.Series

In [None]:
df[['W', 'X']]

Unnamed: 0,W,X
a,2,79
b,6,-29
c,2,21
d,16,-1
e,30,49


## Creating a New Column

In [22]:
df['new'] = df['W'] + df['Y']

In [None]:
df

Unnamed: 0,W,X,Y,Z,new
a,2,79,-8,-86,-6
b,6,-29,88,-80,94
c,2,21,-26,-13,-24
d,16,-1,3,51,19
e,30,49,-48,-99,-18


## Remove a Column

In [23]:
df = df.drop('new', axis = 1)

## Selecting Row

In [None]:
df.loc['a']

W       2
X      79
Y      -8
Z     -86
new    -6
Name: a, dtype: int64

In [24]:
df.loc[['a', 'c']]

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
c,2,21,-26,-13


In [25]:
df.iloc[0]

W     2
X    79
Y    -8
Z   -86
Name: a, dtype: int64

In [26]:
df.iloc[-1]

W    30
X    49
Y   -48
Z   -99
Name: e, dtype: int64

In [27]:
df.iloc[0:3]

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
c,2,21,-26,-13


## Remove Row

In [28]:
df.drop('c')

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
d,16,-1,3,51
e,30,49,-48,-99


In [29]:
df

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
c,2,21,-26,-13
d,16,-1,3,51
e,30,49,-48,-99


In [30]:
df = df.drop('c')

## Grab Subsetction

In [31]:
df

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
d,16,-1,3,51
e,30,49,-48,-99


In [32]:
df.loc['a', 'W']

2

In [33]:
df.loc[['a', 'd'], 'W']

a     2
d    16
Name: W, dtype: int64

In [34]:
df.loc[['a', 'd'], ['W', 'X']]

Unnamed: 0,W,X
a,2,79
d,16,-1


## Condition Selection

In [35]:
df > 0

Unnamed: 0,W,X,Y,Z
a,True,True,False,False
b,True,False,True,False
d,True,False,True,True
e,True,True,False,False


In [36]:
df[df > 0]

Unnamed: 0,W,X,Y,Z
a,2,79.0,,
b,6,,88.0,
d,16,,3.0,51.0
e,30,49.0,,


In [37]:
df

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
d,16,-1,3,51
e,30,49,-48,-99


In [39]:
df['X'] > 0

a     True
b    False
d    False
e     True
Name: X, dtype: bool

In [40]:
df[df['X'] > 0]

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
e,30,49,-48,-99


## Multiple Condition

In [41]:
df[(df['W'] > 0) & (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z
b,6,-29,88,-80
d,16,-1,3,51


## Reset Index

In [42]:
df

Unnamed: 0,W,X,Y,Z
a,2,79,-8,-86
b,6,-29,88,-80
d,16,-1,3,51
e,30,49,-48,-99


In [43]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,a,2,79,-8,-86
1,b,6,-29,88,-80
2,d,16,-1,3,51
3,e,30,49,-48,-99


**How to change Column to Index**

In [48]:
new_ind = ['CA', 'NY', 'WY', 'OR']

In [49]:
df['States'] = new_ind

In [50]:
df

Unnamed: 0,W,X,Y,Z,States
a,2,79,-8,-86,CA
b,6,-29,88,-80,NY
d,16,-1,3,51,WY
e,30,49,-48,-99,OR


In [51]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,79,-8,-86
NY,6,-29,88,-80
WY,16,-1,3,51
OR,30,49,-48,-99


## Some Summaries / Statistics

In [53]:
df.describe()

Unnamed: 0,W,X,Y,Z
count,4.0,4.0,4.0,4.0
mean,13.5,24.5,8.75,-53.5
std,12.476645,48.590122,57.197756,70.11657
min,2.0,-29.0,-48.0,-99.0
25%,5.0,-8.0,-18.0,-89.25
50%,11.0,24.0,-2.5,-83.0
75%,19.5,56.5,24.25,-47.25
max,30.0,79.0,88.0,51.0


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, a to e
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   W       4 non-null      int64 
 1   X       4 non-null      int64 
 2   Y       4 non-null      int64 
 3   Z       4 non-null      int64 
 4   States  4 non-null      object
dtypes: int64(4), object(1)
memory usage: 352.0+ bytes


In [55]:
df.dtypes

W          int64
X          int64
Y          int64
Z          int64
States    object
dtype: object