In [1]:
import pandas as pd
import numpy as np
import scipy
import plotly.graph_objects as go
from plotly.graph_objs import FigureWidget
from plotly.callbacks import Points, InputDeviceState
from ipywidgets import HBox, VBox, Button, interact
from functools import partial
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

In [25]:
df_all = pd.read_csv('https://raw.githubusercontent.com/bevi-rosso/oleic/master/AcOleic.csv', sep=';')
df = df_all.drop(['DATA','GreenOleo'],axis=1)
X = np.array(df.index)
col = df_all.GreenOleo

In [26]:
@interact(variabile=df.columns)
def sel(variabile):
    fig=go.Figure()
    fig.add_trace(go.Scatter(x=df_all.DATA,y=df[variabile], 
                             mode='markers+lines', 
                             marker_color = col, marker_symbol = 200, marker_size=8,
                             line_color = 'green', line_width=2))
    fig.update_layout(title='Plot selected variable', xaxis_title='data')
    fig.show()

interactive(children=(Dropdown(description='variabile', options=('INSAP.', 'C12', 'C14', 'C14:1', 'C15', 'C16'…

## PCA ##

In [4]:
# PCA
pca = PCA()
xs = scale(df)
x_red = pca.fit_transform(xs)
V = pca.components_
nPC = np.arange(len(V)) + 1
dfV = pd.DataFrame(V, columns=[df.columns], index=[nPC])
EV_cum = pd.DataFrame(np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100),
                  columns =['% Explained Variance'], index=[nPC])

In [5]:
print ('PCs = ', len(V))

PCs =  22


In [6]:
EV_cum.T

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,13,14,15,16,17,18,19,20,21,22
% Explained Variance,16.85,31.56,40.48,48.7,55.68,61.21,66.11,70.81,75.36,79.33,...,89.16,92.01,94.42,96.31,98.01,99.53,99.99,99.99,99.99,99.99


In [7]:
# Calculate ellipse bounds and plot with scores
theta = np.concatenate((np.linspace(-np.pi, np.pi, 50), np.linspace(np.pi, -np.pi, 50)))
circle = np.array((np.cos(theta), np.sin(theta)))
sigma = np.cov(np.array((x_red[:, 0], x_red[:, 1])))
ed = np.sqrt(scipy.stats.chi2.ppf(0.95, 2))
ell = np.transpose(circle).dot(np.linalg.cholesky(sigma) * ed)
a, b = np.max(ell[: ,0]), np.max(ell[: ,1]) #95% ellipse bounds
t = np.linspace(0, 2 * np.pi, 100)

In [8]:
# Score plot
xtit = 'PC1 - explained variance = '+str(np.round(100*pca.explained_variance_ratio_[0],decimals=2))+'%'
ytit = 'PC2 - explained variance = '+str(np.round(100*pca.explained_variance_ratio_[1],decimals=2))+'%'
fig=go.Figure()
fig.add_trace(go.Scatter(x=x_red[:,0], y=x_red[:,1], mode='markers', marker=dict(symbol=[200], 
             color=col, line_width=2, size=10)))
fig.add_trace(go.Scatter(x=a * np.cos(t),y=b * np.sin(t),mode='lines',
                        line=dict(color='lightgreen', width=2, dash='dash')))
fig.update_layout(height=600, width=800, title='PCA - Score plot', 
                  xaxis_title=xtit,yaxis_title=ytit, showlegend=False, xaxis_zeroline=True, yaxis_zeroline=True, 
                  xaxis_zerolinecolor='blue', yaxis_zerolinecolor='blue')

In [9]:
score=pd.DataFrame(x_red)
index = np.linspace(1,len(V),num=len(V))
index = index.astype('int')
index = index.astype('str')
score.columns=index
@interact(PCa=score.columns, PCb=score.columns)
def sel(PCa, PCb):
    fig=go.Figure()
    fig.add_trace(go.Scatter(x=df_all.DATA,y=score[PCa], name=PCa, mode='lines', marker_color = 'green'))
    fig.add_trace(go.Scatter(x=df_all.DATA,y=score[PCb], name=PCb, mode='lines', marker_color = 'red'))
    fig.update_layout(height=600, width=800, title='PCA - components plot',
                     xaxis_title='data', yaxis_title='PCs',
                     xaxis_zeroline=True, yaxis_zerolinecolor='blue')
    fig.show()

interactive(children=(Dropdown(description='PCa', options=('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', …

In [10]:
# Loadings plot
arr = np.array(dfV)
label = np.array(df.columns)
plot = go.Scatter(x=arr[0,:], y=arr[1,:], mode='markers+text', marker=dict(symbol=[201],color='darkred', size=10), 
              text = label, textposition="top left")
fig=go.Figure()
fig.add_trace(plot)
fig.update_layout(height=600, width=800, title='PCA - Loadings plot', xaxis_title=xtit,yaxis_title=ytit, 
                  xaxis_zeroline=True, yaxis_zeroline=True, xaxis_zerolinecolor='blue', yaxis_zerolinecolor='blue')
fig.update_xaxes(range=[min(arr[0,:])-0.2, max(arr[0,:])+0.2])
fig.update_yaxes(range=[min(arr[1,:])-0.2, max(arr[1,:])+0.2])

In [11]:
N = len(df)
f1 = FigureWidget(**{
    'data': [{'marker': {'cmax': 2, 'cmin': 0, 'color': np.zeros(N), 'size': 8},
              'mode': 'markers', 'type': 'scatter', 'x': x_red[:,0], 'y': x_red[:,1]}],
    'layout': {'dragmode': 'lasso', 'xaxis': {'title': 'PC1'}, 'yaxis': {'title': 'PC2'}}})
scatt1 = f1.data[0]

In [12]:
f2 = FigureWidget(**{
    'data': [{'marker': {'cmax': 2, 'cmin': 0, 'color': np.zeros(N), 'size': 8},
              'mode': 'markers+lines', 'type': 'scatter', 'x': X, 'y': x_red[:,0]}],
    'layout': {'dragmode': 'lasso', 'xaxis': {'title': 'n°sample'}, 'yaxis': {'title': 'PC1'}}})
scatt2 = f2.data[0]

In [13]:
f3 = FigureWidget(**{
    'data': [{'marker': {'cmax': 2, 'cmin': 0, 'color': np.zeros(N), 'size': 8},
              'mode': 'markers+lines', 'type': 'scatter', 'x': X, 'y': x_red[:,1]}],
    'layout': {'dragmode': 'lasso', 'xaxis': {'title': 'n°sample'}, 'yaxis': {'title': 'PC2'}}})
scatt3 = f3.data[0]

In [14]:
# Configure brush on both plots to update both plots
def brush(trace, points, state):
    inds = np.array(points.point_inds)
    if inds.size:
        selected = scatt1.marker.color.copy()
        selected[inds] = 1
        scatt1.marker.color = selected
        scatt2.marker.color = selected
        scatt3.marker.color = selected
    
scatt1.on_selection(brush)
scatt2.on_selection(brush)
scatt3.on_selection(brush)

In [15]:
# Reset brush
def reset_brush(btn):
    selected = np.zeros(N)
    scatt1.marker.color = selected
    scatt2.marker.color = selected
    scatt3.marker.color = selected
    
# Create reset button
button = Button(description="RESET")
button.on_click(reset_brush)

In [16]:
upper=HBox([f1, button])
lower=HBox([f2,f3])
dashboard = VBox([upper, lower])
dashboard

VBox(children=(HBox(children=(FigureWidget({
    'data': [{'marker': {'cmax': 2,
                         'cmi…