Imports

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns 
import keras.backend as K
from pandas.tseries.offsets import MonthEnd
from keras.layers import LSTM
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, SGD, Adagrad, RMSprop
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale
from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import confusion_matrix
from sklearn.datasets import make_moons
from sklearn.metrics import precision_score, recall_score, fl_score
from sklearn.model_selection import learning_curve
from scipy.ndimage.filters import convolve
from scipy.signal import convolve2d
from scipy import misc

Machine Learning 

In [None]:
#-------------------------------------------------------- Linear Regression
df = pd.read_csv('../data/weight-height.csv')
print(df.head())

df.plot(kind = 'scatter',
        x = 'Height', 
        y = 'Weight',
        title = 'Weight and Height in adults')

def line(x, w = 0, b = 0):
  return x * w + b

x = np.linspace(55, 80, 100)
yhat = line(x, w=0, b=0)

df.plot(kind = 'scatter',
        x = 'Height', 
        y = 'Weight',
        title = 'Weight and Height in adults')
plt.plot(x, yhat, color = "red", linewidth = 3)

#--------------------------------------------------------- Cost Function
def mean_squared_error(y_true, y_pred):
  s = (y_true - y_pred) ** 2
  return s.mean()

x = df[['Height']].values
y_true = df['Weight'].values
y_pred = line(x)
mean_squared_error(y_true, y_pred)

plt.figure(sigsize = (10,5))
axl = plt.subplot(121)
df.plot(kind = 'scatter',
        x = 'Height', 
        y = 'Weight',
        title = 'Weight and Height in adults')

bbs = np.array([-100, -50, 0, 50, 100, 150])
mses = []
for b in bbs:
  y_pred = line(x, w = 2, b = b)
  mse = mean_squared_error(y_true, y_pred)
  mses.append(mse)
  plt.plot(x, y_pred)

ax2 = plt.subplot(122)
plt.plot(bbs, mses, 'o-')
plt.title('Cost as a function of b')
plt.xlabel('b')

#----------------------------------------------------Linear Regression with Keras
model = Sequential()
model.add(Dense(1, input_shape = (1,)))
model.summary()

model.compile(Adam(ir = 0.8), 'mean_squared_error')
model.fit(x, y_true, epochs = 40)

y_pred = model.predict(x)
df.plot(kind = 'scatter',
        x = 'Height', 
        y = 'Weight',
        title = 'Weight and Height in adults')

w, b = model.get_weights()
print(w, b)

#----------------------------------------------------Classification
df = pd.read_csv('../data/user_visit_duration.csv')
print(df.head())

df.plot(kind = 'scatter',
        x = 'Time (min)',
        y = 'Buy')
model = Sequential()
model.add(Dense(1, input_shape = (1,), activation = 'sigmold'))
model.compile(SGD(1r = 0.5), 'bbinary_crossentropy', metrics = ['accuracy'])
model.summary()

x = df[['Time (min)']].values
y = df['Buy'].values

model.fit(x, y, epochs = 25)
ax = df.plot(kind = 'scatter',
        x = 'Time (min)',
        y = 'Buy',
        title = 'Purchase behavior VS time spent on site')

temp = np.linspace(0,4)
ax.plot(temp, model.predict(temp), color ='orange')
plt.legend(['model', 'data'])

y_pred = model.predict(x)
y_class_pred = y_pred > 0.5

#--------------------------------------------------------Train/Test spilt
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.2)
params = model.get_weights()
params = [np. zeros(w.shape) for w in params]
model.set_weightd(params)

print("The accuracy score is {:0.3f}".format(accuracy_score(y, model.prediction)))
model.fit(xTrain, yTrain, epochs = 25, verbose = 0)

#---------------------------------------------------------Cross Validation
def build_logistic_regression_model():
  model = Sequential()
  model.add(Dense(1, input_shape = (1,) activation = 'sigmoid'))
  model.compile(SGD(1r = 0.5), 'binary_crossentropy', metrics =['accuracy'])
  return model

model = KerasClassifier(build_fn = build_logistic_regression_model, 
                        epochs = 25,
                        verbose = 0)
print(model)

cv = KFold(3, shuffle = True)
scores = cross_val_score(model, x, y, cv = cv)
print(cv, scores)

print("The cross validation accuracy is {:0.4f} + {:0.4f}".format(scores.mean(),scores.std()))
                                                                  
#---------------------------------------------------------Confusion Matrix   
confusion_matrix(y, y_class_pred)

def pretty_confusion_matrix(y_True, y_pred, labels = ["False", "True"]):
  cm = confusion_matrix(y_true, y_pred)
  pred_labels = ['Predicted' + 1 for 1 in labels]
  df = pd.DataFrame(cm, index = labels, columns = pred_labels)
  return df

pretty_confusion_matrix(y, y_class_pred, ['Not Buy', 'Buy'])
print("Precision:\t{0:3f}".format(precision_score(y, y_class_pred)))
print("Recall    \t{0:3f}".format(recall_score(y, y_class_pred)))
print("F1 Score  \t{0:3f}".format(f1_score_score(y, y_class_pred)))
print(classification_report(y, y_class_pred))

Deep Learning Intro

In [None]:
x, y = make_moons(n_samples = 1000, noise = 0.1, random_state = 0)
plt.plot(x[y == 0, 0], x[y == 0, 1], 'ob', alpha = 0.5)
plt.plot(x[y == 1, 0], x[y == 1, 1], 'xr', alpha = 0.5)
plt.legend(['o', '1']))

print(x.shape)

results = model.evaluate(xTrain, yTest)
print(results)

model = Sequential()
model.add(Dense(1, input_shape = (2,), activation = 'sigmoid'))
model.compile(Adam(1r = 0.05), 'binary_crossentropy', metrics = ['accuracy'])
print(
  model.fit(xTrain, yTrain, epochs = 200, verbose = 0),
  results = mpdel.evaluate(xTest, yTest)
)
print('The Accuracy score on the Train set is:\t{:0.3f}'.format(results[1]))

def plot_decision_boundary(model, x, y):
  amin, bmin = x.min(axis = 0) - 0.1
  amax, bmax = x.max(axis = 0) + 0.1
  hticks = np.linspace(amin, amax, 101)
  vticks = np.linspace(bmin, bmax, 101)

  aa, bb = np.meshgrid(hticks, vticks)
  ab = np.c_[aa.ravel(), bb.ravel()]

  c = model.predict(ab)
  cc = c.reshape(aa.shape)

  plt.figure(figsize = (12,8))
  plt.contourf(aa, bb, cc, cmap = 'bwr', alpha = 0.2)
  plt.plot(x[y == 0, 0], x[y == 0, 1], 'ob', alpha = 0.5)
  plt.plot(x[y == 1, 0], x[y == 1, 1], 'xr', alpha = 0.5)
  plt.legend(['0', '1'])

plot_decision_boundary(model, x, y)

model = Sequential()
model.add(Dense(1, input_shape = (2,), activation = 'tanh'))
model.add(Dense(1, activation = 'tanh'))
model.add(Dense(1,  activation = 'sigmoid'))
model.compile(Adam(1r = 0.05), 'binary_crossentropy', metrics = ['accuracy'])

model.fit(xTrain, yTrain, epochs = 100, verbose = 0)
model.evaluate(xTest, yTest)

xTrain_pred = model.predict_classess(xTrain)
yTrain_pred = model.predict_classes(xTest)

print('The Accuracy score on the Train set id:\t{:0.3f}'.format(accuracy_score))
print('The Accuracy score on the Test set id:\t{:0.3f}'.format(accuracy_score))
plot_decision_boundary(model, x, y)

df = pd.read_csv('../data/iris.csv')
sns.pairplot(df, hue = 'species')

print(df.head())

x = df.drop('species', axis = 1)
print(x.head())

target_names = df['species'].unique()
print(target_names)

target_dict = {n:i for i, n in enumerate(target_names)}
print(target_dict)
 
y= df['species'].map(target_dict)
print(y.head())

y_cat = to_catgorical(y)
print(y_cat[:10])



Gradient Descent

In [None]:
#-----------------------------------------Learning Rate
df = pd.read_csv('../data/banknotes.csv')
print(df.head(),
  df['class'].value_counts())

sns.pairplot(df, hue = 'class')
x = scale(df.drop('class', axis = 1).values)
y = df['class'].values

model = RandomForestClassifier()
cross_val_score(model, x, y)

K.clear_session()

model.Sequential()
model.add(Dense(1, input_shape(4,), activation = 'sigmold'))
model.compile(loss = 'binary_crossentropy',
              optimzer = 'sgd',
              metrics = ['accuracy'])

history = model.fit(xTrain, yTrain)
result = model.evaluate(xTest, yTest)

historydf = pd.dataFrame(history.history, index = history.epoch)
historydf.plot(ylim = (0,1))
plt.title("Test accuracy: {:3.lf} %". format(result[0]*100), fontsize = 15)

list = []
rate = [0.01, 0.05, 0.1, 0.5]

for lr in rate:
  K.clear_session()
  model.Sequential()
  model.add(Dense(1, input_shape(4,), activation = 'sigmold'))
  model.compile(loss = 'binary_crossentropy',
              optimzer = SGD(lr = lr),
              metrics = ['accuracy'])
  h = model.fir(xTrain, yTrain, batch_size = 16, verbose = 0)
  list.append(pd.DataFrame(h.history, index = h.epoch))

hdf = pd.concat(list, axis = 1)
met = list[0].columns
idx = pd.MultiIndex.from_product([rate, map], 
                                 names = ['rate', 'metric'])
hdf.columns = idx

ax = plt.subplot(211)
hdf.xs('loss', axis = 1, level = 'metric').plot(ylim = (0,1), ax = ax)
plt.title('Loss')

ax = plt.subplot(212)
hdf.xs('loss', axis = 1, level = 'metric').plot(ylim = (0,1), ax = ax)
plt.title("Accuracy")
plt.xlabel("Epoche")

plt.tight_layout()

rate = [16, 32, 64, 128]
for lr in rate:
  K.clear_session()
  model.Sequential()
  model.add(Dense(1, input_shape(4,), activation = 'sigmold'))
  model.compile(loss = 'binary_crossentropy',
              optimzer = 'sgd',
              metrics = ['accuracy'])
  h = model.fir(xTrain, yTrain, batch_size = 16, verbose = 0)
  list.append(pd.DataFrame(h.history, index = h.epoch))

optim = ['SDF(lr = 0.01',
         'SGD(lr = 0.01, momentum = 0.3)',
         'SGD(lr = 0.01, momentum = 0.3), nesterov = True',
         'Adam(lr = 0.01',
         'Adamgrad(lr = 0.01',
         'RMSprop(lr = 0.01']

for opt_name in optim:
  K.clear_session()
  model.Sequential()
  model.add(Dense(1, input_shape(4,), activation = 'sigmold'))
  model.compile(loss = 'binary_crossentropy',
              optimzer =eval(opt_name),
              metrics = ['accuracy'])
  h = model.fir(xTrain, yTrain, batch_size = 16, verbose = 0)
  list.append(pd.DataFrame(h.history, index = h.epoch))

hdf = pd.concat(list, axis = 1)
met = list[0].columns
idx = pd.MultiIndex.from_product([optim, map], 
                                 names = ['rate', 'metric'])

Convolutional Neural Networks

In [None]:
import numpy as np

a = np.random.randint(10, size = (2, 3, 5, 5))
b = np.random.randint(10, size = (2,3))
print(a, b)

img = np.random.randint(255, size = (4, 4, 3), dtype = 'uint8')
print(img)

plt.figure(figsize = (10,10))
plt.subplot(221)
plt.imshow(img)
plt.title('All Channels combined')

plt.subplot(222)
plt.imshow(img[:, :, 0], cmap = 'Reds')
plt.title('Red channel')

plt.subplot(223)
plt.imshow(img[:, :, 1], cmap = 'Greens')
plt.title('Green channel')

plt.subplot(224)
plt.imshow(img[:, :, 2], cmap = 'Blues')
plt.title('Blue channel')

np.tensordot(a, b, axes = ([0, 1], [0, 1]))
np.tensordot(a, b, axes = ([0], [0])).shape

img = misc.ascent()
img.shape

plt.imshow(img, cmap ='gray')
h_kernel = np.array([[1, 2, 1],
                     [0, 0, 0],
                     [-1, -2, -1]])
plt.imshow(h_kernel, cmap = 'gray')

res = convolve2d(img, h_kernel)
plt.imshow(res, cmap = 'gray')

Recurrent Neural Networks

In [None]:
df = pd.read_csv('../data/cansim-0800020-eng-6674700030567901031',
                 skiprows = 6, skipfooter = 9,
                 engine = 'python')
df.head()

df['Adjustments'] = pd.to_datetie(df['Adjustments']) + MonthEnd(1)
df = df.set_index('Adjustments')
print(df.head(),df.plot())

split_date = pd.Timestamp('01-01-2011')
train = df.loc[:split_date, ['Unadjusted']]
test = df.loc[split_date:, ['Unadjusted']]

ax = train.plot()
test.plot(ax = ax)
plt.legend(['train', 'test'])

sc = MinMaxScaler()
trsc = sc.fit_transfomr(train)
tesc = sc.transform(test)

print(trsc[:4])

xTrain = trsc[:-1]
yTrain = trsc[1:]

xTest = tesc[:-1]
yTest = tesc[1:]

print(xTrain.shape,
      xTrain[:, None].shape)

xTrainx = xTrain[:, None]
xTestx = xTest[:, None]

K.clear_session()
model = Sequential()

model.add(LSTM(6, imput_shape = (1,1)))
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')
model.fit(xTrain, yTrain, 
          epochs = 100, batch_size = 1, 
          verbose = 1, callbacks = [early_stop])

ypred = model.predict(xTestx)
plt.plot(yTest)
plt.plot(ypred)

Imporving performance

In [None]:
digits = load_digits()
x, y = digits.data, digits.target

print(x[0])

for i in range(8):
  plt.subplot(1, 8, i+1)
  plt.imshow(x.reshape(-1, 8, 8)[i], cmap = 'gray')

train_scores = []
test_scores = []

for train_size in train_sizes:
  xtf, ytf = train_test_split(xTrain, yTrain, train_size = train_size)
  model.set_weights(input_shape)
  h = model.fit(xtf, ytf, verbose = 0, epochs = 300, 
                callbacks = [EarlyStopping(monitor = 'loss'), patience = 1])
  r = model.evaluate(xtf, ytf, verbose = 0)
  train_scores.append(r[-1])

  e = model.evaluate(xTest, yTest, verbose = 0)
  test_scores.append(e[-1])

  print("Done size: ", train_size)

plt.plot(train_sizes, train_scores, 'o-', label = "Training scores")
plt.plot(train_sizes, test-scores, 'o-', label = 'Cross-validation score')
plt.legend(loc = 'best')

def repeated_training(xTrain, yTrain, xTest, yTest, 
                      units = 512, activation = 'sigmold',
                      optimizer = 'sgd', do_bn = False, 
                      epochs = 10, repeats = 3):
  histories = []
  for repeat in range(repeats):
    K.clear_session()
    model = Sequential()
    model.add(Dense(units, 
                    input_shape = x train.shape[1;],
                    kernal_initializer = 'normal',
                    activation = activation))

    if do_bn:
      model.add(BatchNormalization())
    
    model.add(Dense(units, 
                    kernel_initializer = 'normal',
                    activation = activation))
    if do_bn:
      model.add(BatchNormalization())

    model.add(Dense(units, 
                    kernel_initializer = 'normal',
                    activation = activation))               
    if do_bn:
      model.add(BatchNormalization())
    
    model.add(Dense(10, activation = 'softmax'))
    model.compile(optimizer,
                  'categorical_crossentropy',
                  metrics = ['accuracy'])
    h - model.fit(xTrain, yTrain, vaild = (xTest, yTest),
                  epochs = epochs, verbose = 0,) 
    histories.append([h.history['acc'], h.history['val_acc']])
    print(repeat, end = ' ')
  
  histories = np.array(histories)
  mean_acc = histories.mean(axis = 0)
  std_acc = histories.std(axis = 0)
  print()

  return mean_acc[0], std_acc[0], mean_acc[1], std_acc[1]  

mean_acc, std_acc, mean_acc_val, std_acc_val = repeated_training(xTrain, yTrain, xTest, yTest)

def plot_mean_std(m, s):
  plt.plot(m)
  plt.fill_between(range(len(m), m-s, m+s, alpha = 0.1))

plot_mean_std(mean_acc, std_acc)
plot_mean_std(mean_acc_val, std_acc_val)
plt.ylim(0, 1.01)
plt.title("Batch Normalization Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test', 'Train with Batch Normalization', 'Test with Batch Nomralization'])