In [None]:
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# SciPy - Very Quick Guide
- SciPy (pronounced as Sigh Pi) is a scientific python library to perform mathematical, scientific and engineering computations.
- The SciPy library depends on NumPy, which provides convenient and fast N-dimensional array manipulation. 

## NumPy Vector/Array

In [None]:
import matplotlib.pyplot as plt
import numpy as np
list = [1,2,3,4]
a = np.array(list)
a

In [None]:
np.zeros((2, 3))

In [None]:
np.ones((2, 3))

In [None]:
np.arange(7)

In [None]:
np.linspace(1., 4., 6)

## Matrix

In [None]:
mat = np.matrix('1 2; 3 4')
mat.H

**Transpose**

In [None]:
mat.T

## K-Means

In [None]:
from numpy import vstack, array
from numpy.random import rand
from scipy.cluster.vq import kmeans, vq, whiten

In [None]:
pts = 20
a = np.random.multivariate_normal([0, 0], [[4, 1], [1, 4]], size=pts)
b = np.random.multivariate_normal([30, 10],
                                  [[10, 2], [2, 1]],
                                  size=pts)
data = np.concatenate((a, b))
data

Before running k-means, it is beneficial to **rescale** each feature dimension of the observation set with whitening. Each feature is **divided by its standard deviation** across all observations to give it unit variance.

In [None]:
whitened = whiten(data)

computing **K-Means** with K=2 (2 clusters)

In [None]:
codebook, distortion = kmeans(whitened, 2)
codebook

In [None]:
plt.scatter(whitened[:, 0], whitened[:, 1])
plt.scatter(codebook[:, 0], codebook[:, 1], c='r')

# Interpolate

In [None]:
from scipy import interpolate
x = np.linspace(0, 4, 12)
x

In [None]:
y = np.cos(x**2/3+4)
y

In [None]:
plt.plot(x, y, 'o')

In [None]:
f1 = interpolate.interp1d(x, y, kind = 'linear')
f2 = interpolate.interp1d(x, y, kind = 'cubic')
xnew = np.linspace(0, 4,30)
xnew

In [None]:
plt.plot(x, y, 'o', xnew, f1(xnew), '-', xnew, f2(xnew), '--')
plt.legend(['data', 'linear', 'cubic'], loc = 'best')

# Linalg
SciPy is built using the optimized ATLAS LAPACK and BLAS libraries. It has very fast **linear algebra** capabilities. All of these linear algebra routines expect an object that can be converted into a **two-dimensional array**. The output of these routines is also a two-dimensional array.

## Eigenvalues and Eigenvectors
An eigenvector does not change direction in a transformation
<img src="https://www.mathsisfun.com/algebra/images/eigen-transform.svg" />

In [None]:
from scipy import linalg
A = np.array([[1,2],[3,4]])
A

In [None]:
l, v = linalg.eig(A)
print('eigen values=', l, '\n')
print('eigen vector=', v)

## Singular Value Decomposition (SVD)
- Singular Value Decomposition (SVD) is a common dimensionality reduction technique in data science
- The scipy.linalg.svd factorizes the matrix `a` into two unitary matrices `U` and `Vh` and a 1-D array `s` of singular values (real, non-negative) such that a == U.S.Vh, where `s` is a suitably shaped matrix of zeros with the main diagonal `s`.

In [None]:
a = np.random.randn(3, 2) + 1.j*np.random.randn(3, 2)
a

In [None]:
U, s, Vh = linalg.svd(a)
print(U, '\n\n', Vh, '\n\n', s)