In [None]:
#%matplotlib inline
# To edit this:
# code $(jupyter --data-dir)/nbextensions/snippets/snippets.json

# imports a library 'pandas', names it as 'pd'
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
%matplotlib inline

# enables inline plots, without it plots don't show up in the notebook
# %config InlineBackend.figure_format = 'svg'
%config InlineBackend.figure_format = 'png'
mpl.rcParams['figure.dpi']= 300
mpl.rcParams["figure.figsize"] = (6,4.5)

In [None]:


np.random.seed(42)
x = np.random.randn(100)
x = np.sort(x)
y = np.exp(x+np.random.randn(100)/3)

plt.plot(x,y,'.')
plt.xlabel('$x$')
plt.ylabel('$y$');

In [None]:
plt.plot(x,np.log(y),'.')
plt.xlabel('$x$')
plt.ylabel('$\log y$');

In [None]:
plt.plot(x,np.log(y),'.')
plt.xlabel('$x$')
plt.ylabel('$\log\ y$');

In [None]:

from scipy.special import logit, expit,logsumexp
np.random.seed(43)
x = np.arange(30)
p = expit((15-x)/5)
y = np.random.binomial(1,p)

plt.plot(x,y,'.')
plt.plot(x,p,label="$\mathbb{E}(y|x)$")
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.yticks([0,1]);
plt.legend();
plt.scatter(15 - 5*logit(0.75),0.75,c='C1');

In [None]:

np.random.seed(42)
x = np.random.randn(100)
x = np.sort(x)
y = -x+np.random.randn(100)/3

fig,ax = plt.subplots()
ax.plot(x,y,'.')
ax.set_xlabel('$x$')
ax.set_ylabel('$y$');

In [None]:
import scipy.stats as st

# "Regression" is about computing _conditional expectations_

In [None]:
np.random.seed(42)
x = np.random.randn(100)
x = np.sort(x)
y = -x+np.random.randn(100)/3

def linplot():
    fig,ax = plt.subplots()
    ax.plot(x,y,'.')
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$');

In [None]:
np.random.seed(42)
x = np.random.randn(100)
x = np.sort(x)
y = -x+np.random.randn(100)/3

plt.plot(x,y,'.')
plt.xlabel('$x$')
plt.ylabel('$y$');

yt = np.linspace(np.min(y),np.max(y),100)

x0 = 1.5
xt = x0 - 0.2 * st.norm.pdf(yt,-x0,1/3)
# plt.plot(x0+np.zeros_like(yt),yt,c='C1',alpha=0.2)
# plt.plot(xt,yt);

# xpts = np.array([-1.5,-0.5,0.5,1.5])
# plt.scatter(xpts,-xpts,c='C1');
ylimits = plt.ylim()
plt.plot(x,-x,c='C1',label="$\mathbb{E}(y|x)$");
plt.ylim(*ylimits)
plt.legend();

In [None]:
plt.plot(x,y,'.')
plt.plot(x,p,label="$P(y=1)$")
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend();

In [None]:
plt.plot(x,y,'.')
plt.plot(x,p)
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.scatter(15 - 5*logit(0.75),0.75,c='C1');

In [None]:
x = np.linspace(-4,4)
p = expit(x)
plt.figure(figsize=[6.4,4.8])
plt.plot(x,p);

In [None]:
plt.figure(figsize=[4.8,6.4])
plt.plot(p,x)
plt.xlabel('$p$')
plt.ylabel('$x = $logit$(p)$');

use cmap bwr or seismic

# Multiclass

In [None]:
from sklearn import datasets, svm, metrics
digits = datasets.load_digits(n_class=10)

# The data that we are interested in is made of 8x8 images of digits, let's
# have a look at the first 4 images, stored in the `images` attribute of the
# dataset.  If we were working from image files, we could load them using
# matplotlib.pyplot.imread.  Note that each image must have the same size. For these
# images, we know which digit they represent: it is given in the 'target' of
# the dataset.
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:4]):
    plt.subplot(2, 4, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % label)
plt.show()

# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

In [None]:
X = digits.data
y = digits.target
mask = np.isin(y,[1,3,8,9])
X = X[mask,:]
y = y[mask]

n_samples, n_features = X.shape


In [None]:
# modified from http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html
def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(y[i]),
                 color=plt.cm.tab10(y[i]),
                 fontdict={'weight': 'bold', 'size': 9})

    plt.xticks([]), plt.yticks([])


from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

X2 = X.copy()
X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
X_lda = LinearDiscriminantAnalysis(n_components=5).fit_transform(X2, y)
plot_embedding(X_lda)


In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr = LogisticRegression(multi_class='ovr',solver='lbfgs')
lr.fit(X_lda, y)

In [None]:
xlim,ylim = zip(np.min(X_lda,0),np.max(X_lda,0))

xs = np.linspace(*xlim,500)
ys = np.linspace(*ylim,500)
xx,yy = np.meshgrid(xs,ys)
probs = lr.decision_function(np.vstack([xx.ravel(),yy.ravel()]).T)
preds = np.argmax(probs, 1)
zz = preds.reshape(xx.shape)
#plt.contourf(xx,yy,zz,alpha=0.3)
p  = plt.contourf(xx,yy,zz,cmap=plt.cm.tab10,alpha=0.3)
(np.mean(xx[zz==1]),np.mean(yy[zz==1]))


In [None]:
lr.coef_

In [None]:
lr.intercept_

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
preds.shape

In [None]:
X.shape

In [None]:
y.shape

In [None]:
sns.heatmap(np.sqrt(confusion_matrix(y,lr.predict(X_lda))))

In [None]:
princomps = pca.transform(data)
plt.scatter(princomps[:,0],princomps[:,1],c=digits.target)
digits.target

In [None]:
plt.figure(figsize=(4,1),frameon=False)
x = np.linspace(-4,4,100)
plt.plot(x,expit(x));
plt.axis('off');

In [None]:
np.random.seed(42)

for j in range(100):
    μ = np.random.normal(size=3)
    σ = np.abs(np.random.normal(size=3))
    
    x1 = np.random.normal(0,1,size=100)
    x2 = np.random.normal(0,0.4,size=100)
    x3 = np.random.normal(2,1,size=100)

    x = np.hstack([x1,x2,x3])
    s = np.argsort(x)
    x = x[s].reshape(-1,1)
    y = y = np.array([1]*100 + [2]*100 + [3]*100)[s]



    softmax = LogisticRegression(solver='lbfgs',multi_class='multinomial')
    softmax.fit(x,y)
    plt.plot(x,x.dot(softmax.coef_.T));
    # plt.ylim(0,1);

In [None]:
def truelogprobs(x,μ,σ):
    x = x.reshape(-1,1)
    μ = μ.reshape(1,-1)
    σ = σ.reshape(1,-1)
    logprobs = st.norm.logpdf(x,μ,σ)
    logprobs -= logsumexp(logprobs,1).reshape(-1,1)
    return logprobs

def trueprobs(x,μ,σ):
    x = x.reshape(-1,1)
    μ = μ.reshape(1,-1)
    σ = σ.reshape(1,-1)
    probs = st.norm.pdf(x,μ,σ)
    probs /= np.sum(probs,1).reshape(-1,1)
    return probs


In [None]:
from sklearn.linear_model import LogisticRegression

ovr = LogisticRegression(C=1e100)
softmax = LogisticRegression(C=1e100,solver='lbfgs',multi_class='multinomial')

datapts = 100
iters = 10000
gridsize = 1000
ovr_confusion = np.zeros([gridsize+1,gridsize+1])
softmax_confusion = np.zeros([gridsize+1,gridsize+1])
for j in range(iters):
    # Make some fake data
    μ = np.random.normal(size=3)
    σ = np.abs(np.random.normal(size=3))
    x = np.random.normal(μ.reshape(1,-1),σ.reshape(1,-1),size=(datapts,3))
    x = np.hstack(x.T).reshape(-1,1)
    y = np.array([0]*datapts+[1]*datapts+[2]*datapts)

    # Determine ground truth
    probs = trueprobs(x,μ,σ)
    preds = np.argmax(probs,1)
    maj = (gridsize*probs[range(probs.shape[0]),preds]).astype(int)
    
    # Fit both models
    ovr.fit(x,y)
    softmax.fit(x,y)

    # Increment confusion matrices
    ovr_maj = (gridsize*ovr.predict_proba(x)[range(probs.shape[0]),preds]).astype(int)   
    ovr_confusion[maj,ovr_maj] += 1

    softmax_maj = (gridsize*softmax.predict_proba(x)[range(probs.shape[0]),preds]).astype(int)   
    softmax_confusion[maj,softmax_maj] += 1

    
#     plt.plot(maj, ovr.predict_proba(x)[range(probs.shape[0]),preds],'.',c='C0',alpha=0.01,markersize=1)
#     plt.plot(maj, softmax.predict_proba(x)[range(probs.shape[0]),preds],'.',c='C1',alpha=0.01,markersize=1)
#     plt.plot(probs, ovr.predict_proba(x),c='C0',alpha=0.01)
#     plt.plot(probs, softmax.predict_proba(x),c='C1',alpha=0.01)
#     kl_ovr[j] = np.mean(kl_divergence(trueprobs(x,μ,σ).T,ovr.predict_proba(x).T,base=2))
#     kl_softmax[j] = np.mean(kl_divergence(trueprobs(x,μ,σ).T,softmax.predict_proba(x).T,base=2))

In [None]:
def findqindex(v,q=[0.05,0.5,0.95]):
    c = np.cumsum(v)
    return np.searchsorted(c, np.array(q) * c[-1])

In [None]:
findqindex([5,2,6,3,6,7,4])

In [None]:
np.apply_along_axis(findqindex,1,softmax_confusion)/gridsize

In [None]:
for (m,color,name) in [(softmax_confusion,'C0','softmax'),(ovr_confusion,'C1','one-vs-rest')]:
    qs = np.apply_along_axis(findqindex,1,m)/gridsize
    qs = qs[np.sum(qs,1)>0]
    xs = np.linspace(0.33,1,qs.shape[0])
    plt.plot(xs,qs[:,1],c=color,label=name)
    plt.plot(xs,qs[:,0],c=color,lw=0.2)
    plt.plot(xs,qs[:,2],c=color,lw=0.2)
    plt.fill_between(xs,qs[:,0],qs[:,2],color=color,alpha=0.1)
plt.plot([0.33,1],[0.33,1],c='red',linestyle='--',lw=0.5,label='ground truth')
plt.legend()
plt.xlabel("True majority-class probability")
plt.ylabel('Estimated probability');

In [None]:
probs = st.norm.pdf(x,μ,σ)
preds = np.argmax(probs,1)

In [None]:
plt.plot(kl_softmax, kl_ovr,'.',alpha=0.2)
plt.xlabel('Softmax')
plt.ylabel('One-vs-rest')
m=np.max(kl_softmax)
plt.plot([0,m],[0,m],c='red');

In [None]:
np.random.seed(42)
x1 = np.random.normal(0,1,size=100)
x2 = np.random.normal(0,0.4,size=100)
x3 = np.random.normal(2,1,size=100)

x = np.hstack([x1,x2,x3])
s = np.argsort(x)
x = x[s].reshape(-1,1)
y = y[s]


y = np.array([1]*100 + [2]*100 + [3]*100)
ovr = LogisticRegression()
ovr.fit(x,y)
plt.plot(x,x.dot(ovr.coef_.T))
# plt.plot(x,ovr.predict_proba(x))
# plt.ylim(0,1);

In [None]:
ovr.coef_