In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dat1 = pd.Series([1, 3, 5, np.nan, 6, 8])
dat1

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
type(dat1)

pandas.core.series.Series

In [4]:
dat2 = pd.Series([1, np.nan, 5, np.nan, 6, 8])
dat2

0    1.0
1    NaN
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [8]:
# 날짜 Data 생성
dates = pd.date_range('20130101', periods =6)
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


#### dataframe 만들기

In [9]:
df = pd.DataFrame(np.random.randn(6,4),
                 index = dates, columns = list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.070337,-1.247492,2.29797,-2.348321
2013-01-02,-0.536316,-0.427812,0.827342,-0.336684
2013-01-03,-0.449928,-1.372955,-0.452031,-0.696063
2013-01-04,0.372467,0.436059,-0.455822,-0.385533
2013-01-05,-0.043319,-0.85603,-0.414888,2.975314
2013-01-06,-1.01226,-1.670923,-0.257511,0.176139


In [12]:
df2 = pd.DataFrame({'A': 1. , 
                   'B' : pd.Timestamp('20130103'), 
                   'C' : pd.Series(1, index=list(range(4)), dtype= 'float32'),
                   'D' : np.array([3]*4, dtype = 'int32'), 
                   'E' : pd.Categorical(["test", "train", "test", "train"]), 
                   'F' : "foo"})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-03,1.0,3,test,foo
1,1.0,2013-01-03,1.0,3,train,foo
2,1.0,2013-01-03,1.0,3,test,foo
3,1.0,2013-01-03,1.0,3,train,foo


In [14]:
type(df2)

pandas.core.frame.DataFrame

In [16]:
df2 = pd.DataFrame({'Date': dates, 
                   'A' : dat1, 'B': dat2})
df2

Unnamed: 0,A,B,Date
0,1.0,1.0,2013-01-01
1,3.0,,2013-01-02
2,5.0,5.0,2013-01-03
3,,,2013-01-04
4,6.0,6.0,2013-01-05
5,8.0,8.0,2013-01-06


In [29]:
df2.dtypes

A              float64
B              float64
Date    datetime64[ns]
dtype: object

In [30]:
df2.shape

(6, 3)

In [32]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
A       5 non-null float64
B       4 non-null float64
Date    6 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2)
memory usage: 224.0 bytes


In [33]:
df2.index

RangeIndex(start=0, stop=6, step=1)

In [36]:
df3 = df2.copy()
df3

Unnamed: 0,A,B,Date
0,1.0,1.0,2013-01-01
1,3.0,,2013-01-02
2,5.0,5.0,2013-01-03
3,,,2013-01-04
4,6.0,6.0,2013-01-05
5,8.0,8.0,2013-01-06


In [38]:
# set_index 인덱스 설정
df3_idx = df3.set_index("Date")
df3_idx

Unnamed: 0_level_0,A,B
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,1.0,1.0
2013-01-02,3.0,
2013-01-03,5.0,5.0
2013-01-04,,
2013-01-05,6.0,6.0
2013-01-06,8.0,8.0


In [40]:
df3_idx.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [42]:
df = pd.DataFrame({'month' : [1, 4, 7, 10, 12], 
                  'year' : [2012, 2014, 2013, 2014, 2015], 
                  'sale' : [55, 40, 84, np.nan, 30]})
df1 = df.set_index('month')
df1

Unnamed: 0_level_0,sale,year
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55.0,2012
4,40.0,2014
7,84.0,2013
10,,2014
12,30.0,2015


In [44]:
df2_idx = df.set_index(['year', 'month'])
df2_idx

Unnamed: 0_level_0,Unnamed: 1_level_0,sale
year,month,Unnamed: 2_level_1
2012,1,55.0
2014,4,40.0
2013,7,84.0
2014,10,
2015,12,30.0


In [47]:
df2.columns

Index(['A', 'B', 'Date'], dtype='object')

In [51]:
print(df)
df.describe()

   month  sale  year
0      1  55.0  2012
1      4  40.0  2014
2      7  84.0  2013
3     10   NaN  2014
4     12  30.0  2015


Unnamed: 0,month,sale,year
count,5.0,4.0,5.0
mean,6.8,52.25,2013.6
std,4.438468,23.528352,1.140175
min,1.0,30.0,2012.0
25%,4.0,37.5,2013.0
50%,7.0,47.5,2014.0
75%,10.0,62.25,2014.0
max,12.0,84.0,2015.0


In [52]:
print(df2)
df2.T

     A    B       Date
0  1.0  1.0 2013-01-01
1  3.0  NaN 2013-01-02
2  5.0  5.0 2013-01-03
3  NaN  NaN 2013-01-04
4  6.0  6.0 2013-01-05
5  8.0  8.0 2013-01-06


Unnamed: 0,0,1,2,3,4,5
A,1,3,5,,6,8
B,1,,5,,6,8
Date,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
