In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Load data on Average brain and body weights for 27 species of land animals.
# Source: http://mste.illinois.edu/malcz/DATA/BIOLOGY/Animals.html

bodyweight=[1.35, 465, 36.33, 27.66, 1.04, 11700, 2547, 187.1, 521, 10, 3.3, 529, 207, 62, 6654, 9400, 6.8, 35, 0.12, 0.023, 2.5, 55.5, 100, 52.16, 87000, 0.122, 192]
brainweight=[465, 423, 119.5, 115, 5.5, 50, 4603, 419, 655, 115, 25.6, 680, 406, 1320, 5712, 70, 179, 56, 1, 0.4, 12.1, 175, 157, 440, 154.5, 3, 180]
animalname=[u'Mountain beaver', u'Cow', u'Grey wolf', u'Goat', u'Guinea pig', u'Dipliodocus', u'Asian elephant', u'Donkey', u'Horse', u'Potar monkey', u'Cat', u'Giraffe', u'Gorilla', u'Human', u'African elephant', u'Triceratops', u'Rhesus monkey', u'Kangaroo', u'Golden hamster', u'Mouse', u'Rabbit', u'Sheep', u'Jaguar', u'Chimpanzee', u'Brachiosaurus', u'Mole', u'Pig']


In [None]:
# Plotting body weight vs. brain weight...
plt.plot(bodyweight,brainweight,'.',ms=10);

In [None]:
#Plotting with logarithmic axes:
plt.loglog(bodyweight,brainweight,'.',ms=10);
plt.xlabel('Body weight in kg',fontsize=14);
plt.ylabel('Brain weight in g',fontsize=14);

In [None]:
# Let's employ a robust linear regression [line fitting] approach 
from sklearn import linear_model

X,y=np.log10(bodyweight),np.log10(brainweight)
X.shape=-1,1
                              
# Fit line using all data
lr = linear_model.LinearRegression()
lr.fit(X,y)
       
# Robustly fit linear model with RANSAC algorithm
ransac = linear_model.RANSACRegressor(residual_threshold=0.6)
ransac.fit(X,y)
inlier = ransac.inlier_mask_
outlier = np.logical_not(inlier)

# Predict data for the estimated models
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_ransac = ransac.predict(line_X)

plt.figure(figsize=(10,10))      
plt.plot(X[inlier], y[inlier],'.',color='black',label='Inliers',ms=10)
plt.plot(X[outlier], y[outlier],'.', color='red',label='Outliers',ms=10) 

plt.plot(line_X, line_y, color='blue', linewidth=3, label='Regression')
plt.plot(line_X, line_y_ransac, color='navy', linewidth=3,label='Robust regression')

plt.legend(loc='lower right')
plt.xlabel('Logarithm of Body weight in kg',fontsize=14)
plt.ylabel('Logarithm of Brain weight in g',fontsize=14)
plt.show()

In [None]:
#plot the names of the animals instead of dots. 
plt.figure(figsize=(10,10))        
plt.xlabel('Logarithm of Body weight in kg',fontsize=20)
plt.ylabel('Logarithm of Brain weight in g',fontsize=20)


for jj,ann in enumerate(animalname):
  if outlier[jj]==True:
    plt.text(X[jj],y[jj],ann,color='gold',horizontalalignment='center',verticalalignment='center',fontsize=15)
  else:
    plt.text(X[jj],y[jj],ann,color='yellowgreen',horizontalalignment='center',verticalalignment='center',fontsize=15)

plt.xlim(-2,6)
plt.ylim(-2,6)

plt.savefig('BrainvsBodysize.png')    
plt.plot(line_X, line_y_ransac, color='navy', linewidth=3,label='Robust regression')

plt.savefig('BrainvsBodysizeFIT.png')    
plt.show()

In [None]:
# Loading and plotting some example data for my other demo (used in the slides)

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

numsamples,nx,ny=np.shape(x_train)
plt.imshow(x_train[5,:,:],cmap='gray')

plt.figure(figsize=(10,10))
for i in range(9):
    plt.clf()
    index=np.random.randint(numsamples)
    plt.imshow(x_train[index,:,:],cmap='binary') 
    plt.yticks([])
    plt.xticks([])
    plt.axis('off')
    plt.savefig('Mnist'+str(i)+'.png')