In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt

# Object Creation

In [4]:
# Creating a Series by passing a list of values, letting pandas create a default integer index:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [5]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
# Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:
dates = pd.date_range('20130101', periods = 6)

In [7]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [8]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns = list('ABCD'))

In [9]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.715017,0.722664,1.914847,0.809223
2013-01-02,0.761684,0.836721,1.470041,1.584208
2013-01-03,1.020693,1.553711,0.802922,-1.274066
2013-01-04,1.387629,-1.039416,-0.722169,-1.88307
2013-01-05,-1.376838,0.009836,0.399004,-2.34021
2013-01-06,-1.152743,2.037132,1.202133,-0.581055


In [10]:
# Creating a DataFrame by passing a dict of objects that can be converted to series-like.
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                   'C' : pd.Series(1, index = list(range(4)), dtype = 'float32'),
                   'D' : np.array([3] * 4, dtype = 'int32'),
                   'E' : pd.Categorical(["test", "train", "test", "train"]),
                   'F' : 'foo' })

In [11]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [12]:
# Having specific dtypes
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [13]:
# If you’re using IPython, 
# tab completion for column names (as well as public attributes) is automatically enabled. 
# Here’s a subset of the attributes that will be completed:

df2.<TAB>

# IPython을 사용하지 않으니 답이 안나오는 듯... 
# https://pandas.pydata.org/pandas-docs/stable/10min.html << 참고

SyntaxError: invalid syntax (<ipython-input-13-c952e29a28c2>, line 5)

# Viewing Data

In [14]:
# See the top & bottom rows of the frame
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-1.715017,0.722664,1.914847,0.809223
2013-01-02,0.761684,0.836721,1.470041,1.584208
2013-01-03,1.020693,1.553711,0.802922,-1.274066
2013-01-04,1.387629,-1.039416,-0.722169,-1.88307
2013-01-05,-1.376838,0.009836,0.399004,-2.34021


In [15]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,1.387629,-1.039416,-0.722169,-1.88307
2013-01-05,-1.376838,0.009836,0.399004,-2.34021
2013-01-06,-1.152743,2.037132,1.202133,-0.581055


In [16]:
# Display the index, columns, and the underlying numpy data
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
df.values

array([[-1.71501689,  0.72266413,  1.91484701,  0.80922307],
       [ 0.76168395,  0.83672079,  1.47004089,  1.58420788],
       [ 1.0206934 ,  1.55371063,  0.80292186, -1.2740662 ],
       [ 1.38762925, -1.03941619, -0.72216855, -1.88306951],
       [-1.37683817,  0.00983639,  0.39900362, -2.34021026],
       [-1.15274337,  2.03713231,  1.20213271, -0.5810555 ]])

In [19]:
# Describe shows a quick statistic summary of your data
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.179099,0.686775,0.844463,-0.614162
std,1.379914,1.098799,0.929461,1.541739
min,-1.715017,-1.039416,-0.722169,-2.34021
25%,-1.320814,0.188043,0.499983,-1.730819
50%,-0.19553,0.779692,1.002527,-0.927561
75%,0.955941,1.374463,1.403064,0.461653
max,1.387629,2.037132,1.914847,1.584208


In [20]:
# Transposing your data
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-1.715017,0.761684,1.020693,1.387629,-1.376838,-1.152743
B,0.722664,0.836721,1.553711,-1.039416,0.009836,2.037132
C,1.914847,1.470041,0.802922,-0.722169,0.399004,1.202133
D,0.809223,1.584208,-1.274066,-1.88307,-2.34021,-0.581055


In [21]:
# Sorting by an axis
df.sort_index(axis = 1, ascending = False)

Unnamed: 0,D,C,B,A
2013-01-01,0.809223,1.914847,0.722664,-1.715017
2013-01-02,1.584208,1.470041,0.836721,0.761684
2013-01-03,-1.274066,0.802922,1.553711,1.020693
2013-01-04,-1.88307,-0.722169,-1.039416,1.387629
2013-01-05,-2.34021,0.399004,0.009836,-1.376838
2013-01-06,-0.581055,1.202133,2.037132,-1.152743


In [22]:
# Sorting by values :
df.sort_values(by = 'B')

Unnamed: 0,A,B,C,D
2013-01-04,1.387629,-1.039416,-0.722169,-1.88307
2013-01-05,-1.376838,0.009836,0.399004,-2.34021
2013-01-01,-1.715017,0.722664,1.914847,0.809223
2013-01-02,0.761684,0.836721,1.470041,1.584208
2013-01-03,1.020693,1.553711,0.802922,-1.274066
2013-01-06,-1.152743,2.037132,1.202133,-0.581055
