#  Useful Libraries 

* Pip 
* Numpy 
* Scipy
* Matplotlib

In [3]:
# Import Module
import numpy as np
import time
from scipy.optimize import leastsq
import scipy.optimize as opt
import scipy
import matplotlib.pyplot as plt

import math

# Function definition 
# Residual 
def residual(t, x, y):
    return y - (t[0] * x ** 2 + t[1] * x + t[2])

# Another version 
def residual2(t, x, y):
    print (t[0], t[1])
    return y - t[0]*np.sin(t[1]*x)




In [4]:
# Use Numpy for 2-d array
import numpy as np
a = np.arange(0, 60, 10).reshape((-1, 1))  + np.arange(6)
a
# help(np.arange)
# help(np.reshape)
# np.arange(6)

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

### Array
 Numpy provides ndarray(N dimensional array object). 
 Create array by : 
 * numpy array: array, reshape, shape ...
 * function: arange, linspace, logspace, fromstring, frombuffer, fromfile...


In [5]:
# By array 
L = [1, 2, 3, 4, 5, 6]
print ("L = ", L)
a = np.array(L)
print("a = ", a)

L =  [1, 2, 3, 4, 5, 6]
a =  [1 2 3 4 5 6]


In [6]:
# Create multi-dim array by np.array
b = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])  # two square brackets
b

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [7]:
# Use of the "shape"
print(a.shape)
# help(np.shape)  # Return the shape of an array.
print(b.shape)

# Force to change the shape 
b.shape = 4, 3
print(b) # the positions of the elements in memory don't change

b.shape = 3, 4
print(b)

(6,)
(3, 4)
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [8]:

# Create NEW array by "reshape", while the old one keeps unchanged 
c = b.reshape((4, -1))
print("b = \n", b)
print("c = \n", c)

# However 
# Array b and c share the memory, modify any one will affect the other one 
b[0][1] = 10
print("b = \n", b)
print("c = \n", c)

# Find the data type 
print(a.dtype)
print(b.dtype)

b = 
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
c = 
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
b = 
 [[ 1 10  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
c = 
 [[ 1 10  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
int64
int64


In [9]:
# Specify the element type by dtype 
d = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype = np.float)
f = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype = np.complex)
print(d)
print(f)

# Use "astype" to change type 
f = d.astype(np.int)
print(f)


# Note: don't force to change the type like this 
# d.type = np.int  # Error!
# print(d)

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]]
[[ 1.+0.j  2.+0.j  3.+0.j  4.+0.j]
 [ 5.+0.j  6.+0.j  7.+0.j  8.+0.j]]
[[1 2 3 4]
 [5 6 7 8]]


In [10]:
# By Function

# "arange"
a = np.arange(1, 10, 0.5)
# help(np.arange)
print(a)


[ 1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.   7.5  8.
  8.5  9.   9.5]


In [11]:
# Use "linspace" to specify start, end values and length 
b = np.linspace(1, 10, 10)
print("b = ", b)
# help(np.linspace)

b =  [  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.]


In [12]:
# Specify if the end value is included : start, stop, length
c = np.linspace(1, 10, 10, endpoint = False)
print("c = ", c)
c = np.linspace(1, 10, 10, endpoint = True)
print("c = ", c)


c =  [ 1.   1.9  2.8  3.7  4.6  5.5  6.4  7.3  8.2  9.1]
c =  [  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.]


In [13]:
# Use "logspace" to create geometric sequence 
d = np.logspace(1, 2, 10, endpoint = True)
print("d = ", d)


# Specify the base for "logspace"
f = np.logspace(0, 10, 11, endpoint = True, base = 2, dtype = np.int)
print("f = ", f)

d =  [  10.           12.91549665   16.68100537   21.5443469    27.82559402
   35.93813664   46.41588834   59.94842503   77.42636827  100.        ]
f =  [   1    2    4    8   16   32   64  128  256  512 1024]


In [14]:

# Use "frombuffer", "fromstring", "fromfile" etc to create array 
s = "abcd"
g = np.fromstring(s, dtype = np.int8)  # A new 1-D array initialized from raw binary or text data in a string.
print("g = ", g)
# help(np.fromstring)


g =  [ 97  98  99 100]


### Slicing in Numpy

* Slicing as the standard way in Python ,for example, a[2]
* Use integer values as subscript, boolean ... 
* Slicing in multi-dimensional array is similar. 


In [15]:
# Slicing in a standard way in Python 
a = np.arange(10)
print("a = ", a)
print("The fourth element: ", a[3])
print(a[3:6])
print(a[ :6])
print(a[3: ])
print(a[1:9:2])  # with a step of 2
print(a[::-1]) # backwards 
a[1:4] = 10, 20, 30  # directly modify the element values 
print("New array a:", a)

a =  [0 1 2 3 4 5 6 7 8 9]
The fourth element:  3
[3 4 5]
[0 1 2 3 4 5]
[3 4 5 6 7 8 9]
[1 3 5 7]
[9 8 7 6 5 4 3 2 1 0]
New array a: [ 0 10 20 30  4  5  6  7  8  9]


In [16]:
# Use integer subscripts 
a = np.logspace(0, 9, 10, base = 2)
print(a)

i = np.arange(0, 10, 2)  # subscripts
print("subscripts: ", i)

b = a[i]
print("b = ", b)

# Note: change the value in b doesn't affect a 
b[2] = 10
print(b)
print(a)

[   1.    2.    4.    8.   16.   32.   64.  128.  256.  512.]
subscripts:  [0 2 4 6 8]
b =  [   1.    4.   16.   64.  256.]
[   1.    4.   10.   64.  256.]
[   1.    2.    4.    8.   16.   32.   64.  128.  256.  512.]


In [17]:
# Boolean subscript 
a = np.random.rand(10)
print(a)
print(a > 0.5) # logical values 
print(a[a > 0.5])

[ 0.18242296  0.1298255   0.17778153  0.12823869  0.56978719  0.15321838
  0.30518577  0.72490367  0.66740198  0.71856527]
[False False False False  True False False  True  True  True]
[ 0.56978719  0.72490367  0.66740198  0.71856527]


In [18]:
# Slicing in two dimensional array 
a = np.arange(0, 60, 10).reshape((-1, 1))  + np.arange(6)
print("a: \n",a)
# Slice 
print("3 elements : \n", a[(1, 2, 4), (1, 2, 3)]) # 3 elements with corresponding row and col index 
print("first two cols and all rows except for the first three: \n", a[3:, [0, 1]])


a: 
 [[ 0  1  2  3  4  5]
 [10 11 12 13 14 15]
 [20 21 22 23 24 25]
 [30 31 32 33 34 35]
 [40 41 42 43 44 45]
 [50 51 52 53 54 55]]
3 elements : 
 [11 22 43]
first two cols and all rows except for the first three: 
 [[30 31]
 [40 41]
 [50 51]]


In [20]:
# Time comparison of Numpy and Phython built in math 
for j in np.logspace(0, 7, 10):
    j = int(j)
    x = np.linspace(0, 10, j)
    start = time.clock()
    y = np.sin(x)
    t1 = time.clock() - start 
    
    x = x.tolist()
    start = time.clock()
    for i, t in enumerate(x):
        x[i] = math.sin(t)
    t2 = time.clock() - start
    print(j, ":", t1, t2, t2/t1)
# Numpy is faster when data set is large 

1 : 0.005264999999999631 9.999999999621423e-06 0.0018993352325967945
5 : 7.000000000090267e-06 4.999999999810711e-06 0.7142857142494621
35 : 8.000000000230045e-06 1.7999999999851468e-05 2.2499999999167333
215 : 9.000000000369823e-06 5.599999999983396e-05 6.222222221948093
1291 : 0.00014299999999956015 0.000332000000000221 2.3216783216870085
7742 : 0.000376000000000154 0.001999000000000528 5.316489361701355
46415 : 0.0005670000000002062 0.014793000000000056 26.0899470899377
278255 : 0.0034250000000000114 0.07544199999999979 22.026861313868476
1668100 : 0.02553099999999997 0.464124 18.178841408483823
10000000 : 0.1887080000000001 2.937861 15.568290692498454


In [None]:
    # 8.1 scipy
    # 线性回归例1
    # x = np.linspace(-2, 2, 50)
    # A, B, C = 2, 3, -1
    # y = (A * x ** 2 + B * x + C) + np.random.rand(len(x))*0.75
    #
    # t = leastsq(residual, [0, 0, 0], args=(x, y))
    # theta = t[0]
    # print '真实值：', A, B, C
    # print '预测值：', theta
    # y_hat = theta[0] * x ** 2 + theta[1] * x + theta[2]
    # plt.plot(x, y, 'r-', linewidth=2, label=u'Actual')
    # plt.plot(x, y_hat, 'g-', linewidth=2, label=u'Predict')
    # plt.legend(loc='upper left')
    # plt.grid()
    # plt.show()

    # # 线性回归例2
    # x = np.linspace(0, 5, 100)
    # A = 5
    # w = 1.5
    # y = A * np.sin(w*x) + np.random.rand(len(x)) - 0.5
    #
    # t = leastsq(residual2, [3, 1], args=(x, y))
    # theta = t[0]
    # print '真实值：', A, w
    # print '预测值：', theta
    # y_hat = theta[0] * np.sin(theta[1] * x)
    # plt.plot(x, y, 'r-', linewidth=2, label='Actual')
    # plt.plot(x, y_hat, 'g-', linewidth=2, label='Predict')
    # plt.legend(loc='lower left')
    # plt.grid()
    # plt.show()

    # # 8.2 使用scipy计算函数极值
    # a = opt.fmin(f, 1)
    # b = opt.fmin_cg(f, 1)
    # c = opt.fmin_bfgs(f, 1)
    # print a, 1/a, math.e
    # print b
    # print c

    # marker	description
    # ”.”	point
    # ”,”	pixel
    # “o”	circle
    # “v”	triangle_down
    # “^”	triangle_up
    # “<”	triangle_left
    # “>”	triangle_right
    # “1”	tri_down
    # “2”	tri_up
    # “3”	tri_left
    # “4”	tri_right
    # “8”	octagon
    # “s”	square
    # “p”	pentagon
    # “*”	star
    # “h”	hexagon1
    # “H”	hexagon2
    # “+”	plus
    # “x”	x
    # “D”	diamond
    # “d”	thin_diamond
    # “|”	vline
    # “_”	hline
    # TICKLEFT	tickleft
    # TICKRIGHT	tickright
    # TICKUP	tickup
    # TICKDOWN	tickdown
    # CARETLEFT	caretleft
    # CARETRIGHT	caretright
    # CARETUP	caretup
    # CARETDOWN	caretdown