##  Numpy基本操作

In [1]:
import numpy as np

A = np.array([1, 2, 3, 4])
print A
print A.shape
print A.dtype
print type(A)

[1 2 3 4]
(4,)
int64
<type 'numpy.ndarray'>


In [2]:
B = np.array([
    [1.1, 2.1, 3.1],
    [4.1, 5.1, 6.1]
])
print B
print B.shape
print B.dtype

[[ 1.1  2.1  3.1]
 [ 4.1  5.1  6.1]]
(2, 3)
float64


In [3]:
print np.arange(4)
print np.arange(4).reshape(2, 2)
print np.zeros(4).reshape(2, 2)
print np.arange(8).reshape(2, 2, 2)

[0 1 2 3]
[[0 1]
 [2 3]]
[[ 0.  0.]
 [ 0.  0.]]
[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [4]:
A = np.arange(start=1, step=1, stop=5).reshape(2, 2)
B = np.ones(4).reshape(2, 2)
B[0][0] = 0

print A * 2
print A * B
print A.dot(B)

[[2 4]
 [6 8]]
[[ 0.  2.]
 [ 3.  4.]]
[[ 2.  3.]
 [ 4.  7.]]


In [5]:
A = np.random.random((3, 1))
print A
A = np.random.randint(low=50, high=100, size=(2, 2))
print A
print A.sum(), A.min(), A.max(), A.mean(), A.std(), A.argmax()
print A.sum(axis=0), A.min(axis=1)
print np.sqrt(A)
print A.flatten()
A.sort()
print A
print A.diagonal()

[[ 0.75735849]
 [ 0.3555381 ]
 [ 0.70428799]]
[[85 76]
 [90 60]]
311 60 90 77.75 11.4099737072 2
[175 136] [76 60]
[[ 9.21954446  8.71779789]
 [ 9.48683298  7.74596669]]
[85 76 90 60]
[[76 85]
 [60 90]]
[76 90]


In [6]:
A = np.array([1, 2, 3, 4], dtype='float64').reshape(2, 2)
print A
print A.T
I = np.eye(2)
print I
print A.dot(I)
A_inv = np.linalg.inv(A)
print A_inv
print np.linalg.solve(A, I)
print A.trace()

[[ 1.  2.]
 [ 3.  4.]]
[[ 1.  3.]
 [ 2.  4.]]
[[ 1.  0.]
 [ 0.  1.]]
[[ 1.  2.]
 [ 3.  4.]]
[[-2.   1. ]
 [ 1.5 -0.5]]
[[-2.   1. ]
 [ 1.5 -0.5]]
5.0


In [7]:
x = np.array([8, 8])
y = np.array([7 ,7])
print np.vstack([x, y])
print np.hstack([x, y])
# More details : https://docs.scipy.org/doc/numpy/reference/routines.html

[[8 8]
 [7 7]]
[8 8 7 7]


## Pandas基本操作

In [8]:
import pandas as pd

print pd.Series(np.arange(10))
print pd.date_range('2017-06-01', periods=7)
df = pd.DataFrame(np.arange(25).reshape(5,5), columns=['A', 'B', 'C', 'D', 'E'])
print df

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64
DatetimeIndex(['2017-06-01', '2017-06-02', '2017-06-03', '2017-06-04',
               '2017-06-05', '2017-06-06', '2017-06-07'],
              dtype='datetime64[ns]', freq='D')
    A   B   C   D   E
0   0   1   2   3   4
1   5   6   7   8   9
2  10  11  12  13  14
3  15  16  17  18  19
4  20  21  22  23  24


In [9]:
print df[['A','C']]
df[['A', 'C']] = df[['C', 'A']]
print df
print df[df['A'] > 10]
print df[(df['A'] > 10) & (df['C'] < 15)]

    A   C
0   0   2
1   5   7
2  10  12
3  15  17
4  20  22
    A   B   C   D   E
0   2   1   0   3   4
1   7   6   5   8   9
2  12  11  10  13  14
3  17  16  15  18  19
4  22  21  20  23  24
    A   B   C   D   E
2  12  11  10  13  14
3  17  16  15  18  19
4  22  21  20  23  24
    A   B   C   D   E
2  12  11  10  13  14


In [10]:
from IPython.display import display

dates = pd.date_range('2017-06-01', periods=7)
df1 = pd.DataFrame(np.random.randint(low=100, high=300, size=(7, 3)), index=dates, columns=[u'早餐', u'午餐', u'晚餐'])
display(df1)

Unnamed: 0,早餐,午餐,晚餐
2017-06-01,211,211,194
2017-06-02,159,129,236
2017-06-03,263,223,166
2017-06-04,187,212,248
2017-06-05,265,180,232
2017-06-06,203,138,240
2017-06-07,204,226,137


In [11]:
df2 = pd.DataFrame(np.random.randint(low=40, high=80, size=(7, 1)), index=dates, columns=[u'飲料'])
df2.ix['2017-06-01', u'飲料'] = 85
df2.ix['2017-06-03':'2017-06-05', u'飲料'] = np.nan
display(df2)

df3 = pd.concat([df1, df2], axis=1)
df3[u'總和'] = df3.sum(axis=1)
display(df3) 

Unnamed: 0,飲料
2017-06-01,85.0
2017-06-02,47.0
2017-06-03,
2017-06-04,
2017-06-05,
2017-06-06,61.0
2017-06-07,53.0


Unnamed: 0,早餐,午餐,晚餐,飲料,總和
2017-06-01,211,211,194,85.0,701.0
2017-06-02,159,129,236,47.0,571.0
2017-06-03,263,223,166,,652.0
2017-06-04,187,212,248,,647.0
2017-06-05,265,180,232,,677.0
2017-06-06,203,138,240,61.0,642.0
2017-06-07,204,226,137,53.0,620.0


In [12]:
df4 = pd.DataFrame({u'早餐':[50], u'晚餐':[100]}, pd.date_range('2017-06-08', periods=1))
display(df4)
df5 = pd.concat([df3, df4])
display(df5)
df5[u'總和'] = df5.sum(axis=1)
display(df5) 

Unnamed: 0,早餐,晚餐
2017-06-08,50,100


Unnamed: 0,午餐,早餐,晚餐,總和,飲料
2017-06-01,211.0,211,194,701.0,85.0
2017-06-02,129.0,159,236,571.0,47.0
2017-06-03,223.0,263,166,652.0,
2017-06-04,212.0,187,248,647.0,
2017-06-05,180.0,265,232,677.0,
2017-06-06,138.0,203,240,642.0,61.0
2017-06-07,226.0,204,137,620.0,53.0
2017-06-08,,50,100,,


Unnamed: 0,午餐,早餐,晚餐,總和,飲料
2017-06-01,211.0,211,194,1402.0,85.0
2017-06-02,129.0,159,236,1142.0,47.0
2017-06-03,223.0,263,166,1304.0,
2017-06-04,212.0,187,248,1294.0,
2017-06-05,180.0,265,232,1354.0,
2017-06-06,138.0,203,240,1284.0,61.0
2017-06-07,226.0,204,137,1240.0,53.0
2017-06-08,,50,100,150.0,
