# Pandas library
* Built on top of NumPy
* Allows fast analysis, data cleaning, and analysis
* Performance and productivity excells
* It has built-in visualization features

**tip: shift + tab to see more info**

In [2]:
import numpy as np
import pandas as pd

In [3]:
labels = ['a','b','c']

In [4]:
my_data = [10,20,30]

### Normal way

In [5]:
arr = np.array(my_data)

In [6]:
d = {'a':10,'b':20,'c':30}

In [7]:
d

{'a': 10, 'b': 20, 'c': 30}

### VS Pandas way

In [8]:
pd.Series(labels, my_data)

10    a
20    b
30    c
dtype: object

In [9]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

## DataFrames
Just a bunch of series

In [10]:
np.random.seed(101)

In [11]:
df = pd.DataFrame(np.random.randn(5,4), ["A", "B", "C", "D", "E"],["W","X","Y","Z"])

In [12]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [13]:
type(df)

pandas.core.frame.DataFrame

In [14]:
type(df["W"])

pandas.core.series.Series

In [15]:
df["W"]

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

axis = 0 means rows
axis = 1 means collumns

In [16]:
df["new"] = df["X"] + df["Y"]

In [17]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.536102
B,0.651118,-0.319318,-0.848077,0.605965,-1.167395
C,-2.018168,0.740122,0.528813,-0.589001,1.268936
D,0.188695,-0.758872,-0.933237,0.955057,-1.692109
E,0.190794,1.978757,2.605967,0.683509,4.584725


In [18]:
df.drop('new', axis=1, inplace=True)

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


"inplace" is to save changes made to column (to prevent accidental changes)

In [20]:
df.drop("E")

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


##### Selecting columns

In [21]:
df[["W", "Y"]]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
C,-2.018168,0.528813
D,0.188695,-0.933237
E,0.190794,2.605967


#### Selecting rows

In [22]:
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [24]:
df.iloc[0]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [25]:
df.loc['B', 'Y']

-0.8480769834036315

In [26]:
df.loc[['A', 'B'], ['W', 'Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


In [28]:
df[df > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [29]:
df[df["W"]>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [30]:
df[df["W"]>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [31]:
df[df["W"]>0][['X', 'Y']].loc["A"]

X    0.628133
Y    0.907969
Name: A, dtype: float64