## **Pandas Library**

* **DataFrames: Introduction**

In [99]:
import numpy as np
import pandas as pd


In [100]:
# Always remember to use "from" before and "import" after for numpy.random like randn
from numpy.random import randn

In [101]:
# Creating a DataSet
np.random.seed(101)

In [102]:
# df = dataframe, creating the table
df = pd.DataFrame(randn(5,4),['LA','LB','LC','LD','LE'],['C1','C2','C3','C4'])

print(df)

          C1        C2        C3        C4
LA  2.706850  0.628133  0.907969  0.503826
LB  0.651118 -0.319318 -0.848077  0.605965
LC -2.018168  0.740122  0.528813 -0.589001
LD  0.188695 -0.758872 -0.933237  0.955057
LE  0.190794  1.978757  2.605967  0.683509


In [103]:
# You can choose what to see 
df['C3']

LA    0.907969
LB   -0.848077
LC    0.528813
LD   -0.933237
LE    2.605967
Name: C3, dtype: float64

In [104]:
type(df)

pandas.core.frame.DataFrame

In [105]:
# You can do this 
df['C2':]

Unnamed: 0,C1,C2,C3,C4
LA,2.70685,0.628133,0.907969,0.503826
LB,0.651118,-0.319318,-0.848077,0.605965
LC,-2.018168,0.740122,0.528813,-0.589001
LD,0.188695,-0.758872,-0.933237,0.955057
LE,0.190794,1.978757,2.605967,0.683509


In [106]:
df[['C1','C3']]

Unnamed: 0,C1,C3
LA,2.70685,0.907969
LB,0.651118,-0.848077
LC,-2.018168,0.528813
LD,0.188695,-0.933237
LE,0.190794,2.605967


In [107]:
# To create or add a new column
df['new_C5'] = df['C1'] + df['C2']

df

Unnamed: 0,C1,C2,C3,C4,new_C5
LA,2.70685,0.628133,0.907969,0.503826,3.334983
LB,0.651118,-0.319318,-0.848077,0.605965,0.3318
LC,-2.018168,0.740122,0.528813,-0.589001,-1.278046
LD,0.188695,-0.758872,-0.933237,0.955057,-0.570177
LE,0.190794,1.978757,2.605967,0.683509,2.169552


In [108]:
# Now if you want to move

df.drop('new_C5',axis = 1)


Unnamed: 0,C1,C2,C3,C4
LA,2.70685,0.628133,0.907969,0.503826
LB,0.651118,-0.319318,-0.848077,0.605965
LC,-2.018168,0.740122,0.528813,-0.589001
LD,0.188695,-0.758872,-0.933237,0.955057
LE,0.190794,1.978757,2.605967,0.683509


In [109]:
# check this out
df

Unnamed: 0,C1,C2,C3,C4,new_C5
LA,2.70685,0.628133,0.907969,0.503826,3.334983
LB,0.651118,-0.319318,-0.848077,0.605965,0.3318
LC,-2.018168,0.740122,0.528813,-0.589001,-1.278046
LD,0.188695,-0.758872,-0.933237,0.955057,-0.570177
LE,0.190794,1.978757,2.605967,0.683509,2.169552


In [110]:
# If you don't want to lose the changes you made, you need:
df.drop('new_C5', axis=1, inplace=True)


In [111]:
# Now
df

Unnamed: 0,C1,C2,C3,C4
LA,2.70685,0.628133,0.907969,0.503826
LB,0.651118,-0.319318,-0.848077,0.605965
LC,-2.018168,0.740122,0.528813,-0.589001
LD,0.188695,-0.758872,-0.933237,0.955057
LE,0.190794,1.978757,2.605967,0.683509


In [116]:
# Next example
df.drop('LD')

Unnamed: 0,C1,C2,C3,C4
LA,2.70685,0.628133,0.907969,0.503826
LB,0.651118,-0.319318,-0.848077,0.605965
LC,-2.018168,0.740122,0.528813,-0.589001
LE,0.190794,1.978757,2.605967,0.683509


In [117]:
# to see the shape of the table
df.shape

(5, 4)

In [None]:
df[['C1','C3']]

Unnamed: 0,C1,C3
LA,2.70685,0.907969
LB,0.651118,-0.848077
LC,-2.018168,0.528813
LD,0.188695,-0.933237
LE,0.190794,2.605967


In [123]:
# We saw how to select columns, now we will see how to select rows.

df.loc[['LB', 'LD']]

Unnamed: 0,C1,C2,C3,C4
LB,0.651118,-0.319318,-0.848077,0.605965
LD,0.188695,-0.758872,-0.933237,0.955057


In [124]:
# You can do
df.iloc[2]

C1   -2.018168
C2    0.740122
C3    0.528813
C4   -0.589001
Name: LC, dtype: float64

In [128]:
# How to select a single value by specifying the row and column:
df.loc['LC','C1']

-2.018168244037392

In [129]:
# Another example:
df.loc[['LA','LB'],['C1','C2']]

Unnamed: 0,C1,C2
LA,2.70685,0.628133
LB,0.651118,-0.319318
