In [1]:
import numpy as np
import pandas as pd

In [5]:
# 2d iterable
df = pd.DataFrame([[1,2],[5,6]])
df

Unnamed: 0,0,1
0,1,2
1,5,6


In [6]:
# dict
pd.DataFrame(data={'Name': ["John", "Smith"], 'Age': [30, 40]})

Unnamed: 0,Name,Age
0,John,30
1,Smith,40


In [12]:
# range data
# weekly
df = pd.date_range("20190503", periods=6, freq='d')
df

DatetimeIndex(['2019-05-03', '2019-05-04', '2019-05-05', '2019-05-06',
               '2019-05-07', '2019-05-08'],
              dtype='datetime64[ns]', freq='D')

In [15]:
# Monthly
dates = pd.date_range("20190503", periods=6, freq='M')
dates

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31'],
              dtype='datetime64[ns]', freq='M')

In [21]:
df = pd.DataFrame(np.random.random(18).reshape(6,3),
                  index=dates,
                  columns=list("abc"))
df.head(3)

Unnamed: 0,a,b,c
2019-05-31,0.826802,0.707867,0.122915
2019-06-30,0.49389,0.200973,0.89066
2019-07-31,0.553276,0.800402,0.609575


In [23]:
df.tail(3)

Unnamed: 0,a,b,c
2019-08-31,0.311122,0.890033,0.417232
2019-09-30,0.038657,0.494082,0.642495
2019-10-31,0.53537,0.750192,0.299288


In [24]:
df.describe()

Unnamed: 0,a,b,c
count,6.0,6.0,6.0
mean,0.459853,0.640592,0.497027
std,0.264487,0.252598,0.27357
min,0.038657,0.200973,0.122915
25%,0.356814,0.547528,0.328774
50%,0.51463,0.72903,0.513403
75%,0.548799,0.78785,0.634265
max,0.826802,0.890033,0.89066


In [25]:
df["a"]

2019-05-31    0.826802
2019-06-30    0.493890
2019-07-31    0.553276
2019-08-31    0.311122
2019-09-30    0.038657
2019-10-31    0.535370
Freq: M, Name: a, dtype: float64

#### slice data

In [30]:
# Select multiple columns
df.loc[:,['a','b']] 

Unnamed: 0,a,b
2019-05-31,0.826802,0.707867
2019-06-30,0.49389,0.200973
2019-07-31,0.553276,0.800402
2019-08-31,0.311122,0.890033
2019-09-30,0.038657,0.494082
2019-10-31,0.53537,0.750192


#### fancy indexing

In [35]:
df.iloc[:, 2:] # same as numpy fancy indexing

Unnamed: 0,c
2019-05-31,0.122915
2019-06-30,0.89066
2019-07-31,0.609575
2019-08-31,0.417232
2019-09-30,0.642495
2019-10-31,0.299288


#### dropping columns

In [37]:
df.drop(columns=['b'])

Unnamed: 0,a,c
2019-05-31,0.826802,0.122915
2019-06-30,0.49389,0.89066
2019-07-31,0.553276,0.609575
2019-08-31,0.311122,0.417232
2019-09-30,0.038657,0.642495
2019-10-31,0.53537,0.299288


#### Concatenating

In [50]:
df1 = pd.DataFrame({'name':["철수","영희","김수환무"],
                    'Age' : [30, 50, 240]})
df2 = pd.DataFrame({'name': ['John', 'Smith','Paul'],
                     'Age': ['25', '30', '50']},
                    index=[0, 1, 3])

In [51]:
dfc = pd.concat([df1,df2], sort=False)
dfc
# Because I messed up with index, it can't be sorted.

Unnamed: 0,name,Age
0,철수,30
1,영희,50
2,김수환무,240
0,John,25
1,Smith,30
3,Paul,50


In [52]:
df3 = pd.DataFrame({'name': ['John3', 'Smith3','Paul'],
                     'Age': ['25', '30', '50']},
                    index=[0, 1, 3])

df = pd.concat([dfc,df3], sort=False)

#### drop duplicate

In [53]:
df.drop_duplicates("name")
# Only one Paul left

Unnamed: 0,name,Age
0,철수,30
1,영희,50
2,김수환무,240
0,John,25
1,Smith,30
3,Paul,50
0,John3,25
1,Smith3,30


#### remaning

In [72]:
df.rename(columns={'name':'성함', 'age':'나이'})
# No warning on the missing column, 'age'

Unnamed: 0,성함,나이,height
0,john,47,181
1,smith,2,87
2,robert,15,167
3,trump,57,190


In [58]:
pwd

'/home/hoseung/coding/MyLibrary'

#### Applying arbitrary numpy operation

In [71]:
df["age"].apply(np.sqrt)

0    6.855655
1    1.414214
2    3.872983
3    7.549834
Name: age, dtype: float64

### remember, Python is not Julia.  You can't use Korean column name

In [73]:
df["나이"].apply(np.sqrt) 

KeyError: '나이'

#### Reading files (excel, CSV-style text)

You can also read SQL data, try it!

Pandas support importing/exporting **excel, csv, text, html, json, SQL** data format.

In [64]:
# install xlrd
df = pd.read_excel('example_xls.xlsx')
# You can also choose to read certain sheets

In [61]:
df

Unnamed: 0,name,age,height
0,john,47,181
1,smith,2,87
2,robert,15,167
3,trump,57,190
