## Scatter

- [`plotly.express.scatter()`](https://plotly.com/python-api-reference/generated/plotly.express.scatter.html#plotly.express.scatter)

### Scatter 3d

- [`plotly.express.scatter_3d()`](https://plotly.com/python-api-reference/generated/plotly.express.scatter_3d.html#plotly.express.scatter_3d)

Vamos gerar um gráfico de dispersão 3D com dados que baixamos da fonte [worldwide Protein Data Bank (PDB)](https://www.wwpdb.org/).

Lemos os dados do arquivo `../Data/265d.pdb` como uma lista de listas e extraímos as coordenadas $x, y, z$ dos elementos dos índices $0, 1 e 2$ respectivamente de cada elemento da lista.

## `plotly.express`

Plotly Express é um  "wrapper" de alto nível para a biblioteca Plotly, essencialmente significa que ele executa muitas das tarefas que podemos fazer com Plotly com uma sintaxe mais simples. É fácil de usar e não requer especificação se queremos trabalhar online ou offline.

Todas as funções do Plotly Express retornam um objeto do tipo `graph_objects.Figure` cujos dados e layout são definidos de acordo com os argumentos fornecidos.

No geral podemos usar
`import plotly.express as px`
no lugar de 
`import plotly.graph_objects as go`

E substituir `fig = go.Figure ()` por qualquer função Plotly Express, por exemplo `px.bar (...)`

Funções disponíveis em [plotly.express: high-level interface for data visualization](https://plotly.com/python-api-reference/plotly.express.html):

In [None]:
import numpy as np
import pandas as pd

import plotly
import plotly.express as px

import chart_studio
import chart_studio.plotly as py

In [None]:
def load(path = "../PointsCloud/1576-PoinP11.csv"):
    df = pd.read_csv(path,sep = ';')
    df["X"] = df.X.apply(lambda x: x.replace(",","."))
    df["Y"] = df.Y.apply(lambda x: x.replace(",","."))
    df["Z"] = df.Z.apply(lambda x: x.replace(",","."))

    seg = df.sample(10000)

    seg["X"] = np.array(seg.X).astype(float)
    seg["Y"] = np.array(seg.Y).astype(float)
    seg["Z"] = np.array(seg.Z).astype(float) 

    x_coord = np.array(seg.X).astype(float)
    y_coord = np.array(seg.Y).astype(float)
    z_coord = np.array(seg.Z).astype(float)

    seg = np.array(seg)
    
    return seg,z_coord,y_coord,x_coord

def clear(x_coord,y_coord,z_coord):
    mask = (z_coord<4)#+(y_coord<4)+(x_coord<4))>0
    
    z_coord = z_coord[mask]
    y_coord = y_coord[mask]
    x_coord = x_coord[mask]
    
    return x_coord, y_coord,z_coord


In [None]:
seg,z_coord,y_coord,x_coord = load(path = "../PointsCloud/1576-PoinP11.csv")
x_coord, y_coord,z_coord    = clear(x_coord,y_coord,z_coord)

In [None]:
import plotly.graph_objects as go
import numpy as np

def plot(x_coord,y_coord,z_coord,online = True):
    
    marks_size = [2]*len(x_coord)
    
    fig = px.scatter_3d(x = x_coord, y = y_coord, z = z_coord,
                        color = z_coord, 
                        color_discrete_sequence = 'Viridis',
                        opacity = 0.6, 
                        size = marks_size
                       )
    fig.show()
    username = 'GuintherK' # your username
    api_key = 'ANamiRMzp8BfuEv9Hg7N' # your api api_key

    chart_studio.tools.set_credentials_file(username = username, api_key = api_key)
    if online:
        if api_key: py.plot(fig, filename = 'fig1_online.png', auto_open = True)
        
plot(x_coord,y_coord,z_coord,online = True)

In [None]:
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
def grid(x_coord,meters):
    x_grid = np.linspace(min(x_coord),max(x_coord),int((max(x_coord)-min(x_coord))/meters))
    return x_grid

def generate_grid(x_coord,y_coord,z_coord,voxel_size_x,voxel_size_y,voxel_size_z):
    x_grid = grid(x_coord,voxel_size_x)#.reshape(-1,1)
    y_grid = grid(y_coord,voxel_size_y)#.reshape(-1,1)
    z_grid = grid(z_coord,voxel_size_z)#.reshape(-1,1)
    return x_grid,y_grid,z_grid

In [None]:
def voxel_filter(x_grid,y_grid,z_grid,n_points):
    bins   = []
    points = []
    print("Voxel filtering")
    for i in tqdm(range(len(x_grid)-1)):
        for j in range(len(y_grid)-1):
            for k in range(len(z_grid)-1):

                x0,xf = x_grid[i:i+2]
                y0,yf = y_grid[j:j+2]
                z0,zf = z_grid[k:k+2]
                
                mask_x = (seg[:,0]>x0) * (seg[:,0]<xf)   # X
                mask_y = (seg[:,1]>y0) * (seg[:,1]<yf)   # Y
                mask_z = (seg[:,2]>z0) * (seg[:,2]<zf)   # Z

                seg_mask = mask_x*mask_y*mask_z
                #print(seg_mask.sum())
                bins.append(seg_mask.sum())

                if seg_mask.sum()>n_points:
                    
                    #points.append((
                    #              np.median(seg[seg_mask,0]),
                    #              np.median(seg[seg_mask,1]),
                    #              np.median(seg[seg_mask,2])
                    #              ))

                    points.append((
                                  seg[seg_mask,0].mean(),
                                  seg[seg_mask,1].mean(),
                                  seg[seg_mask,2].mean()
                                  ))
                    
                    #points.append((
                    #              (x0+xf)/2,
                    #              (y0+yf)/2,
                    #              (z0+zf)/2
                    #              ))
                    
                #print(x_grid[i:i+2],y_grid[j:j+2],z_grid[k:k+2])
    bins   = np.array(bins)
    points = np.array(points)
    return points,bins

In [None]:
def linReg(x_coord,z_coord):
    A = np.vstack([x_coord, np.ones(len(x_coord))]).T
    m, c = np.linalg.lstsq(A, z_coord, rcond=None)[0]
    return m, c

<img src="L.png">

In [None]:
def OLS_linear_regression(xi,yi):
    xi = xi.reshape(-1)
    yi = yi.reshape(-1)
    
    beta  = (len(xi)*(xi*yi).sum() - xi.sum()*yi.sum()) / ( len(xi)*(xi**2).sum() - xi.sum()**2)
    alpha = yi.mean() - beta * xi.mean()
    
    return beta,alpha

In [None]:
from sklearn import linear_model

def RANSAC(X,y,max_trials = 130,min_samples = 0.8,stop_probability = 0.99):
    X,y = X.reshape(-1,1),y.reshape(-1,1)
    # Robustly fit linear model with RANSAC algorithm
    ransac = linear_model.RANSACRegressor(max_trials       = max_trials, 
                                          min_samples      = min_samples,
                                          stop_probability = stop_probability )
    ransac.fit(X, y)
    inlier_mask = ransac.inlier_mask_
    outlier_mask = np.logical_not(inlier_mask)

    # Predict data of estimated models
    line_X = np.arange(X.min(), X.max())[:, np.newaxis]
    #line_y = lr.predict(line_X)
    line_y_ransac = ransac.predict(line_X)
    
    return line_X, line_y_ransac

In [None]:
def generate_line(X,Y,Xi,Xf):
    #X = line_X
    #Y = line_y_ransac
    M = (Y[1]-Y[0])/(X[1]-X[0])
    B = Y[0] - M*X[0] 
    x_plot = np.linspace(Xi,Xf,100)
    y_plot = M*x_plot + B
    
    return x_plot,y_plot

def generate_M_B(X,Y,Xi,Xf):
    #X = line_X
    #Y = line_y_ransac
    M = (Y[1]-Y[0])/(X[1]-X[0])
    B = Y[0] - M*X[0] 
    x_plot = np.linspace(Xi,Xf,100)
    y_plot = M*x_plot + B
    
    return M,B

def return_axis(points):
    x_coord_filtered     = points[:,0]
    y_coord_filtered     = points[:,1]
    z_coord_filtered     = points[:,2]
    return x_coord_filtered, y_coord_filtered, z_coord_filtered 


In [None]:
x_grid,y_grid,z_grid  = generate_grid(x_coord,y_coord,z_coord,0.1)
points,bins           = voxel_filter(x_grid,y_grid,z_grid,30)
x_coord_filtered, y_coord_filtered, z_coord_filtered  = return_axis(points)

In [None]:
def Li(x):
    var  = x_coord.var()
    mean = x_coord.mean() 
    return min(x)-1 if  min(x)>-2 and  min(x)<2 else min(x)*1.2
    
def Ls(x):
    var   = x_coord.var()
    mean  = x_coord.mean()
    return max(x)*1.5

def setLims(x_coord,z_coord):

    Xi = Li(x_coord)
    Xf = Ls(x_coord)
    Yi = Li(z_coord)
    Yf = Ls(z_coord)
    return Xi,Xf,Yi,Yf

In [None]:
Xi,Xf,Yi,Yf          = setLims(x_coord,z_coord)

beta,alpha           = OLS_linear_regression(x_coord_filtered,z_coord_filtered)

m_f , c_f            = linReg(x_coord_filtered,z_coord_filtered)
m   , c              = linReg(x_coord,z_coord)

line_X,line_y_ransac               = RANSAC(x_coord_filtered,z_coord_filtered)
line_X_origin,line_y_ransac_origin = RANSAC(x_coord,z_coord)

x_plot,y_plot                      = generate_line(line_X,line_y_ransac,Xi,Xf)

In [None]:
def plot2D(x_coord_filtered,
           y_coord_filtered,
           z_coord_filtered,
           x_coord,
           y_coord,
           z_coord,
           Xi,
           Xf,
           Yi,
           Yf,
           name = "figure1.jpg",
           pts_per_voxel = "20",
           tam_voxel = "0.2",
           variancia = "4"):
    
    plt.figure(figsize = (20,8))
    plt.scatter(x_coord,z_coord,label="raw")
    plt.scatter(x_coord_filtered,z_coord_filtered,label="filtered",alpha = 0.95)



    #plt.plot(   x_coord, m_f*x_coord + c_f, 'g', label='Fitted line')
    plt.plot(   x_coord, m*x_coord + c, 'r', label='Lin Reg origin data',alpha = 0.7)

    plt.plot(   x_coord, beta*x_coord + alpha, 'b', label='Lin Reg filtered',alpha = 0.7)

    plt.plot(
        line_X_origin,
        line_y_ransac_origin,
        color="green",
        linewidth=0.3,
        label="RANSAC regressor origin",
        alpha = 0.95
    )


    plt.plot(
        x_plot,
        y_plot,
        color="black",
        linewidth=5,
        label="RANSAC regressor filtered",
        alpha = 0.95
    )
    
    plt.xlim([Xi, Xf])
    plt.ylim([Yi, Yf])

    plt.legend(loc="upper right")
    plt.title(name+'Umbara measure '+" pts/voxel: "+pts_per_voxel+" tam_voxel: "+tam_voxel+" variancia: "+variancia )
    

    plt.savefig("images/"+name+".png",dpi = 200)
    plt.savefig("images/"+name+".svg")
   

In [None]:
plot2D(x_coord_filtered,
           y_coord_filtered,
           z_coord_filtered,
           x_coord,
           y_coord,
           z_coord,
           Xi,
           Xf,
           Yi,
           Yf)

In [None]:
def generate_line(X,Y,Xi,Xf):
    #X = line_X
    #Y = line_y_ransac
    M = (Y[1]-Y[0])/(X[1]-X[0])
    B = Y[0] - M*X[0] 
    x_plot = np.linspace(Xi,Xf,100)
    y_plot = M*x_plot + B
    
    return x_plot,y_plot

def generate_M_B(X,Y,Xi,Xf):
    #X = line_X
    #Y = line_y_ransac
    M = (Y[1]-Y[0])/(X[1]-X[0])
    B = Y[0] - M*X[0] 
    x_plot = np.linspace(Xi,Xf,100)
    y_plot = M*x_plot + B
    
    return M,B

x_plot,y_plot = generate_line(line_X,line_y_ransac,Xi,Xf)


In [None]:
import plotly.graph_objects as go
import numpy as np

x_plot,y_plot = generate_line(line_X,line_y_ransac,Xi,Xf)
M,B = generate_M_B(line_X,line_y_ransac,Xi,Xf)
    
x = np.outer(np.linspace(Xi,Xf, 30), np.ones(30))
y = np.outer(np.linspace(Yi, Yf, 30), np.ones(30)).T
z = M*x+B 

marks_size = [2 for i in range(len(x_coord))]

fig = px.scatter_3d(x = x_coord, y = y_coord, z = z_coord,
                    color = z_coord, 
                    color_discrete_sequence = 'Viridis',
                    opacity = 0.6, 
                    size = marks_size
                   )

fig.add_trace(go.Surface(x=x, y=y, z=z))
fig.show()

#fig = px.bar(y = [2, 3, 1])
#fig.show() # muestra el gráfico en la celda de abajo
if api_key: py.plot(fig, filename = 'fig1_online.png', auto_open = True)

In [None]:
#bins.sort()
plt.plot(bins[bins>4])
#seg[mask_x,0]


## Plotly online (chart-studio)

Quando fazemos um gráfico online, o gráfico e os dados são salvos em nossa conta plotly na nuvem.

Usando o método `plot` de` chart_studio.plotly` salvamos o gráfico gerado na nuvem e configurando` auto_open = True` ele é exibido em uma nova aba do navegador. Ao defini-lo como False, ele só é salvo na nuvem.

Exemplo:

Definimos o nome de usuário e a chave de API que usaremos para conectar e, em seguida, mostramos o gráfico no notebook, salvamos na nuvem e mostramos em uma nova guia do navegador.

In [None]:
#fig = px.bar(y = [2, 3, 1])
#fig.show() # muestra el gráfico en la celda de abajo
if api_key: py.plot(fig, filename = 'fig1_online.png', auto_open = True)

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model, datasets


n_samples  = 1000
n_outliers = 50

X, y, coef = datasets.make_regression(
    n_samples=n_samples,
    n_features=1,
    n_informative=1,
    noise=10,
    coef=True,
    random_state=0,
)

X = x_coord.reshape(-1,1)
y = z_coord.reshape(-1,1)

# Add outlier data
#np.random.seed(0)
#X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))
#y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)

# Fit line using all data
lr = linear_model.LinearRegression()
lr.fit(X, y)

# Robustly fit linear model with RANSAC algorithm
ransac = linear_model.RANSACRegressor(max_trials       = 30, 
                                      min_samples      = 0.5,
                                      stop_probability = 0.99 )
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)

# Predict data of estimated models
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_ransac = ransac.predict(line_X)

# Compare estimated coefficients
print("Estimated coefficients (true, linear regression, RANSAC):")
print(coef, lr.coef_, ransac.estimator_.coef_)

lw = 2

plt.scatter(
    X[outlier_mask], y[outlier_mask], color="black", marker=".", label="Outliers"
)

plt.scatter(
    X[inlier_mask], y[inlier_mask], color="gold", marker=".", label="Inliers"
)

plt.plot(line_X, line_y, color="navy", linewidth=lw, label="Linear regressor")
plt.plot(
    line_X,
    line_y_ransac,
    color="cornflowerblue",
    linewidth=lw,
    label="RANSAC regressor",
)
plt.legend(loc="upper right")
plt.xlabel("Input")
plt.ylabel("Response")
plt.show()

In [None]:
def filterNoise(x_coord,y_coord,z_coord,variancia):
    
    li_x = x_coord.mean() - variancia*x_coord.var() 
    ls_x = x_coord.mean() + variancia*x_coord.var()
    
    li_y = y_coord.mean() - variancia*y_coord.var() 
    ls_y = y_coord.mean() + variancia*y_coord.var()
    
    li_z = z_coord.mean() - variancia*z_coord.var() 
    ls_z = z_coord.mean() + variancia*z_coord.var()
    
    mask_x = (x_coord>li_x)*(x_coord<ls_x)
    mask_y = (y_coord>li_y)*(y_coord<ls_y)   
    mask_z = (z_coord>li_z)*(z_coord<ls_z)
    
    print("Var x: ",x_coord.var(), "Var y: ",y_coord.var(), "Var z: ",z_coord.var())

    seg_mask = mask_x*mask_y*mask_z
    
    return x_coord[seg_mask],y_coord[seg_mask],z_coord[seg_mask]


In [None]:
import os
os.listdir("../../PointCloudsUmbara")

In [None]:
import os
for i in range(len(os.listdir("../../PointCloudsUmbara")[:-1])):
    try:
        #tam_voxel        = 0.2
        pts_per_voxel    = 20
        voxel_size_x     = 0.2
        voxel_size_y     = 0.2
        voxel_size_z     = 0.2
        variancia        = 2
        max_trials       = 130
        min_samples      = 0.8
        stop_probability = 0.99
        
        umbara0 = os.listdir("../../PointCloudsUmbara")[i]
        seg,z_coord,y_coord,x_coord = load(path = "../../PointCloudsUmbara/"+umbara0)
        print("Umbara exp:",i)
        print("Total points",len(z_coord))
        #x_coord, y_coord,z_coord    = clear(x_coord,y_coord,z_coord)

        x_coord,y_coord,z_coord = filterNoise(x_coord,y_coord,z_coord,variancia)


        x_grid,y_grid,z_grid        = generate_grid(x_coord,y_coord,z_coord,voxel_size_x,voxel_size_y,voxel_size_z)
        points,bins                 = voxel_filter(x_grid,y_grid,z_grid,pts_per_voxel)
        x_coord_filtered, y_coord_filtered, z_coord_filtered = return_axis(points)
        Xi,Xf,Yi,Yf                 = setLims(x_coord_filtered,z_coord_filtered)
        beta,alpha                  = OLS_linear_regression(x_coord_filtered,z_coord_filtered)
        m_f , c_f                   = linReg(x_coord_filtered,z_coord_filtered)
        m   , c                     = linReg(x_coord,z_coord)
        line_X,line_y_ransac               = RANSAC(x_coord_filtered,z_coord_filtered)
        line_X_origin,line_y_ransac_origin = RANSAC(x_coord,z_coord)
        x_plot,y_plot                      = generate_line(line_X,line_y_ransac,Xi,Xf)


        plot2D(x_coord_filtered,
                   y_coord_filtered,
                   z_coord_filtered,
                   x_coord,
                   y_coord,
                   z_coord,
                   Xi-0.3,
                   Xf+0.3,
                   Yi-2,
                   Yf,
                   str(i),
                   str(pts_per_voxel),
                   str(tam_voxel),
                   str(variancia)
                  )
    except Exception as e:
        print("error measure:",i, e)


In [None]:
i = 18
umbara0 = os.listdir("../../PointCloudsUmbara")[i]
seg,z_coord,y_coord,x_coord = load(path = "../../PointCloudsUmbara/"+umbara0)

plt.scatter(z_coord,x_coord)