In [1]:
%pylab inline 
import pandas as pd
import seaborn as sns
sns.set(style='ticks', font_scale=2)

Populating the interactive namespace from numpy and matplotlib


In [2]:
import scipy.stats as stats
from ipywidgets import interact, interactive, fixed

# Distribution parameters demo

In [32]:
import scipy.stats as stats


n = 10
xs = stats.norm().rvs(n)

@interact(location=(-5,5), scale=(0.1,2, 0.05), adjust_axis=False)
def plot_dist(location, scale, adjust_axis):
    dist = stats.norm(location,scale)
    x = linspace(-10,10, 1000)
    p = dist.pdf(x)

    fig, ax = subplots(figsize=(8,4))
    ax.plot(x,p, lw=3)
    ax.set_xlabel('x')
    ax.set_ylabel('pdf')
    
    if adjust_axis:
        ax.set_ylim(0)
        ax.set_xlim(location-10*scale, location+10*scale)
    else:
        ax.set_ylim(0, 1.5)
        ax.set_xlim(-10, 10)
    
    xs_scaled = (xs)*scale +location
    ax.plot(xs_scaled, 0*xs, 'ro', ms=15)

interactive(children=(IntSlider(value=0, description='location', max=5, min=-5), FloatSlider(value=1.05, descr…

## Rotation demo

In [17]:
import scipy.stats as sts

In [28]:
n = 20
x = sts.norm.rvs(size=n)
a = 1
y = a*x + sts.norm(scale=.25).rvs(n)

X = c_[x,y]

In [29]:
def get_percent_var(X):
    v = X.var(axis=0)
    return v/v.sum()

from scipy.spatial.distance import pdist, squareform

In [31]:
c1 = 'black'
c2 = 'blue'

## add scaling

@interact(theta=(-90,90,5), rotate_axis=False, project_on_x=False, scale=(1,3,0.5))
def plot_rot(theta, rotate_axis, project_on_x, scale):
    theta *= pi/180
    r = array([[cos(theta), -sin(theta)],
            [sin(theta), cos(theta)]])
    
    X_scaled = X.copy()
    X_scaled[:,0] *= scale
    
    X_rot = r.dot(X_scaled.T).T
    v_rot = get_percent_var(X_rot)
    
    fig, axs = subplots(1,2, figsize=(12,6))
    
    ax = axs[0]
    ax.plot(X_scaled[:,0], X_scaled[:,1], 'o', color=c1)

    if rotate_axis:
    
        X_axis = array([[2,0],
                        [0,2]])
        
        theta_ax = -theta 
        r_ax = array([[cos(theta_ax), -sin(theta_ax)],
                [sin(theta_ax), cos(theta_ax)]])
        X_axis_rot = r_ax.dot(X_axis.T).T
        # ax.plot()
        w = 0.05
        c = 'gray'
        ax.arrow(0,0, X_axis_rot[0,0], X_axis_rot[0,1], width=w, color=c)
        ax.arrow(0,0, X_axis_rot[1,0], X_axis_rot[1,1], width=w, color=c)
        ax.text(X_axis_rot[0,0], X_axis_rot[0,1], '{:.0f}%'.format(100*v_rot[0]), 
                ha='left', va='bottom')
        ax.text(X_axis_rot[1,0], X_axis_rot[1,1], '{:.0f}%'.format(100*v_rot[1]), 
                ha='center', va='bottom')
    
    
    else:
        ax.plot(X_rot[:,0], X_rot[:,1], 'o', c=c2)
        

        ax.text(2, 0, '{:.0f}%'.format(100*v_rot[0]), 
                ha='left', va='bottom')
        ax.text(0,2 , '{:.0f}%'.format(100*v_rot[1]), 
                ha='center', va='bottom')
        
        
        if project_on_x:
            y = X_rot.copy()
            y[:,1] = 0
            ax.plot(y[:,0], y[:,1], 'kx', ms=10)
            for i in range(n):
                ax.plot([X_rot[i,0]]*2, [X_rot[i,1], 0], '-', color='gray')
        
    ax.spines['left'].set_position('center')
    ax.spines['bottom'].set_position('center')
    # ax.spines['left'].set_visible(False)
    # ax.spines['bottom'].set_visible(False)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xticks([])
    ax.set_yticks([])
    
    lm = 5
    ax.set_xlim(-lm, lm)
    ax.set_ylim(-lm, lm)
    
    
    ###### plot distances dist
    i = 1 if rotate_axis else 0
    i = 0
    d_1D = pdist(X_rot[:,[i]]) 
    d_2D = pdist(X_scaled)
    
    ax = axs[1]
    ax.plot([0,4], [0,4], c='gray')
    ax.plot(d_2D, d_1D, 'o')
    ax.set_xlabel('pairwise distances (x,y)')
    ax.set_ylabel('pairwise distances x-only')
    ax.set_xlim(0,4)
    ax.set_ylim(0,4)
    ax.set_xticks([])
    ax.set_yticks([])
    
    # bns=15
    # ax.hist(d_2D, alpha=0.5, bins=bns)
    # ax.hist(d_1D, alpha=0.5, bins=bns)
    
    
    
    tight_layout()

interactive(children=(IntSlider(value=0, description='theta', max=90, min=-90, step=5), Checkbox(value=False, …

## PCA - wine data

In [8]:
from sklearn.decomposition import PCA


In [9]:
all_data = pd.read_csv('../lecture_2/wine.data.csv')
data = all_data.iloc[:,1:]

In [10]:
z = data.apply(lambda x:(x-x.mean())/x.std(), axis=0)

In [11]:
cultivar = all_data.Class

In [12]:
@interact(normalize=False, color=['none', 'cultivar'], loadings=False, n_loadings=(2,8,1))
def plot_wine_PCA(normalize, color, loadings, n_loadings):
    X = z if normalize else data
    pca = PCA(n_components=2).fit(X)
    X_trans = pca.transform(X)
    
    if color=='none':
        colors = None
    elif color=='cultivar':
        colors = cultivar
    fig, ax = subplots(figsize=(12,8))
    sns.scatterplot(x=X_trans[:,0], y=X_trans[:,1], hue=colors, palette='deep')
    
    xlabel ('PC1 ({:.2f}%)'.format(100*pca.explained_variance_ratio_[0]))
    ylabel ('PC2 ({:.2f}%)'.format(100*pca.explained_variance_ratio_[1]))
    
    if loadings:
        feature_loading_size_sorted = argsort(abs(pca.components_).sum(axis=0))[::-1]
        scale = 1/X_trans.max(axis=0)-X_trans.min(axis=0)
        for j in range(n_loadings):
            i = feature_loading_size_sorted[j]
            plt.arrow(0, 0, pca.components_[0,i]*scale[0], pca.components_[1,i]*scale[1]
                      ,color = 'r',alpha = 0.5)
            plt.text(pca.components_[0,i]*scale[0], pca.components_[1,i]*scale[1], 
                     data.columns[i], color = 'k', ha = 'center', va = 'center', size=16)

interactive(children=(Checkbox(value=False, description='normalize'), Dropdown(description='color', options=('…