# 10 minutes to pandas 

https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html

In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [8]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

## Pandas のデータフレームの作り方

#### index=dates→上のセルのコードからdatesを借りてRowに持ってきている
#### columnsには、ABCDをリスト型にして並べている。

In [9]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [10]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.359944,-1.477111,-0.316244,-0.422446
2013-01-02,-0.754253,1.33286,1.452363,0.947453
2013-01-03,-0.890504,0.276061,-0.800616,0.995107
2013-01-04,-1.504962,-0.982851,0.301031,0.513573
2013-01-05,-0.942626,0.698507,-0.381735,1.327581
2013-01-06,-0.432985,0.612327,-0.942325,0.85712


## Pandas データフレームの作り方　その２

In [11]:
df2 = pd.DataFrame({'A' : 1.,
                                       'B' : pd.Timestamp('20130102'),
                                       'C' : pd.Series(1, index=list(range(4)), dtype='float32')
                   })

In [12]:
df2

Unnamed: 0,A,B,C
0,1.0,2013-01-02,1.0
1,1.0,2013-01-02,1.0
2,1.0,2013-01-02,1.0
3,1.0,2013-01-02,1.0


### dtypesでDataFrameの構造を知ることができる

In [13]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
dtype: object

### .head()で最初からのデータを見ることができる

In [14]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.359944,-1.477111,-0.316244,-0.422446
2013-01-02,-0.754253,1.33286,1.452363,0.947453
2013-01-03,-0.890504,0.276061,-0.800616,0.995107
2013-01-04,-1.504962,-0.982851,0.301031,0.513573
2013-01-05,-0.942626,0.698507,-0.381735,1.327581


### indexでインデックス値を見ることができる

In [16]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

### columnsでカラムの値を見ることができる

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

### descibe()でデータの統計要約を表示することができる

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.694231,0.076632,-0.114588,0.703065
std,0.623,1.079648,0.882711,0.610155
min,-1.504962,-1.477111,-0.942325,-0.422446
25%,-0.929595,-0.668123,-0.695896,0.59946
50%,-0.822379,0.444194,-0.348989,0.902286
75%,-0.513302,0.676962,0.146712,0.983194
max,0.359944,1.33286,1.452363,1.327581


## データの転置

#### .Tでインデックスとカラムを入れ替えることができる

In [20]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.359944,-0.754253,-0.890504,-1.504962,-0.942626,-0.432985
B,-1.477111,1.33286,0.276061,-0.982851,0.698507,0.612327
C,-0.316244,1.452363,-0.800616,0.301031,-0.381735,-0.942325
D,-0.422446,0.947453,0.995107,0.513573,1.327581,0.85712


#### 軸による並び替え

In [26]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.422446,-0.316244,-1.477111,0.359944
2013-01-02,0.947453,1.452363,1.33286,-0.754253
2013-01-03,0.995107,-0.800616,0.276061,-0.890504
2013-01-04,0.513573,0.301031,-0.982851,-1.504962
2013-01-05,1.327581,-0.381735,0.698507,-0.942626
2013-01-06,0.85712,-0.942325,0.612327,-0.432985


### データの取得

In [27]:
df['A']

2013-01-01    0.359944
2013-01-02   -0.754253
2013-01-03   -0.890504
2013-01-04   -1.504962
2013-01-05   -0.942626
2013-01-06   -0.432985
Freq: D, Name: A, dtype: float64

In [29]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.359944,-1.477111,-0.316244,-0.422446
2013-01-02,-0.754253,1.33286,1.452363,0.947453
2013-01-03,-0.890504,0.276061,-0.800616,0.995107


In [30]:
df.loc[dates[0]]

A    0.359944
B   -1.477111
C   -0.316244
D   -0.422446
Name: 2013-01-01 00:00:00, dtype: float64

### 統計

#### 平均値を求める

In [31]:
df.mean()

A   -0.694231
B    0.076632
C   -0.114588
D    0.703065
dtype: float64