In [None]:
# =========================================================
# Metadata extraction functions
# =========================================================

def getRelevantColumns(dataFrame):
    
    relevantColumnsIndex = []
    relevantColumns = []
    
    # Get the columns type instead of columns value
    columnsType = dataFrame.dtypes
    columnsLabels = dataFrame.columns.values
    
    relevantIndexes = []
    
    # Run over the types array searching possible good types for plotting
    for i in range(len(columnsType)):
        if columnsType[i] == "int64" or columnsType[i] == "float64":
            relevantColumnsIndex += [i]
        if columnsType[i] == "object" and columnsLabels[i] == "date":
            relevantColumnsIndex += [i]
            
    index = 0
            
    for column in columnsType:
        if index in relevantColumnsIndex:
            relevantColumns += [column]
        index += 1
        
    return (relevantColumnsIndex, relevantColumns)
        
def getDataFromRelevantColumns(relevantColumnsIndex, dataFrame):
    
    dataFromColumns = []
    dataFromColumn = []
    
    # Get data from selected relevant columnsf
    for i in relevantColumnsIndex:
        for j in dataFrame.index:
            dataFromColumn += [dataFrame.iloc[j].iloc[i]]
        dataFromColumns += [dataFromColumn]
        dataFromColumn = []
        
    return dataFromColumns

In [None]:
# =========================================================
# Plotting graphs
# =========================================================

# Scatter Plots, points in graph
def scatterplot(x_data, y_data, x_label="", y_label="", title="", color = "r", yscale_log=False):

    # Create the plot object
    _, ax = plt.subplots()

    # Plot the data, set the size (s), color and transparency (alpha)
    # of the points
    ax.scatter(x_data, y_data, s = 10, color = color, alpha = 0.75)

    if yscale_log == True:
        ax.set_yscale('log')

    # Label the axes and provide a title
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

# Line Plot
def lineplot(x_data, y_data, x_label="", y_label="", title=""):
    # Create the plot object
    _, ax = plt.subplots()

    # Plot the best fit line, set the linewidth (lw), color and
    # transparency (alpha) of the line
    ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)

    # Label the axes and provide a title
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    
# Histogram
def histogram(data, n_bins, cumulative=False, x_label = "", y_label = "", title = ""):
    _, ax = plt.subplots()
    ax.hist(data, n_bins = n_bins, cumulative = cumulative, color = '#539caf')
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    
# Overlay 2 histograms to compare them
def overlaid_histogram(data1, data2, n_bins = 0, data1_name="", data1_color="#539caf", data2_name="", data2_color="#7663b0", x_label="", y_label="", title=""):
    # Set the bounds for the bins so that the two distributions are fairly compared
    max_nbins = 10
    data_range = [min(min(data1), min(data2)), max(max(data1), max(data2))]
    binwidth = (data_range[1] - data_range[0]) / max_nbins


    if n_bins == 0:
    	bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth)
    else: 
    	bins = n_bins

    # Create the plot
    _, ax = plt.subplots()
    ax.hist(data1, bins = bins, color = data1_color, alpha = 1, label = data1_name)
    ax.hist(data2, bins = bins, color = data2_color, alpha = 0.75, label = data2_name)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'best')
    
# Bar Plot
def barplot(x_data, y_data, error_data, x_label="", y_label="", title=""):
    _, ax = plt.subplots()
    # Draw bars, position them in the center of the tick mark on the x-axis
    ax.bar(x_data, y_data, color = '#539caf', align = 'center')
    # Draw error bars to show standard deviation, set ls to 'none'
    # to remove line between points
    ax.errorbar(x_data, y_data, yerr = error_data, color = '#297083', ls = 'none', lw = 2, capthick = 2)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)



def stackedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
    _, ax = plt.subplots()
    # Draw bars, one category at a time
    for i in range(0, len(y_data_list)):
        if i == 0:
            ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
        else:
            # For each category after the first, the bottom of the
            # bar will be the top of the last category
            ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right')



def groupedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
    _, ax = plt.subplots()
    # Total width for all bars at one x location
    total_width = 0.8
    # Width of each individual bar
    ind_width = total_width / len(y_data_list)
    # This centers each cluster of bars about the x tick mark
    alteration = np.arange(-(total_width/2), total_width/2, ind_width)

    # Draw bars, one category at a time
    for i in range(0, len(y_data_list)):
        # Move the bar to the right on the x-axis so it doesn't
        # overlap with previously drawn ones
        ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right')
    
    # Candle Stick Plot
def boxplot(x_data, y_data, base_color="#539caf", median_color="#297083", x_label="", y_label="", title=""):
    _, ax = plt.subplots()

    # Draw boxplots, specifying desired style
    ax.boxplot(y_data
               # patch_artist must be True to control box fill
               , patch_artist = True
               # Properties of median line
               , medianprops = {'color': median_color}
               # Properties of box
               , boxprops = {'color': base_color, 'facecolor': base_color}
               # Properties of whiskers
               , whiskerprops = {'color': base_color}
               # Properties of whisker caps
               , capprops = {'color': base_color})

    # By default, the tick label starts at 1 and increments by 1 for
    # each box drawn. This sets the labels to the ones we want
    ax.set_xticklabels(x_data)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    
def violinplot(data, x_label="", y_label=""):
    # Create a figure instance
    fig = plt.figure()

    # Create an axes instance
    ax = fig.add_axes([0,0,1,1])
    
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)

    # Create the boxplot
    bp = ax.violinplot(data)
    plt.show()

In [None]:
# =========================================================
# Heuristic functions
# =========================================================

from datetime import datetime
from collections import Counter

def heuristics(dataTypesArray, dataTypesIndex, data, relevantLabelsIndex, labels, ids, dataFrame):
    
    if len(dataTypesArray) <= 0:
        return
    
    if len(dataTypesArray) <= 1:
        return
    
    df = pd.DataFrame(data)
    #df.nunique()
    
    idsIndex = []
    
    # Get DataFrame values index that matches user chosen columns
    for i in range(len(dataFrame.columns.values)):
        if dataFrame.columns.values[i] == ids[0]:
            idsIndex.append(i)
        if len(ids) > 1:
            if dataFrame.columns.values[i] == ids[1]:
                idsIndex.append(i)
                
    row, col = dataFrame.shape
                
                
                
                
                        
    if len(idsIndex) > 1:
        
        if row > 500:
            
            # Bar chart vertical, scatter
            if dataFrame.dtypes[idsIndex[0]] == "int64" and dataFrame.dtypes[idsIndex[1]] == "int64":

                if not dataFrame[ids[0]].is_monotonic:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are  VIOLIN or SCATTER plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    errorSet = buildErrorSet(len(x_data))

                    if preferredGraph == "scatter":
                        scatterplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                    elif preferredGraph == "violin":
                        violinplot([x_data, y_data], x_label=ids[0], y_label=ids[1])
                    else:
                        print("Wrong graph type.")
                else:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")

            # Line chart

            if dataFrame.dtypes[idsIndex[0]] == "int64" and dataFrame.dtypes[idsIndex[1]] == "float64":
                graphSelection()

                print("Choose the most confortable graph for the data you've inputted:")
                print("Recommended types are VIOLIN or SCATTER plots.")
                print("Type the graph you would like to see:")
                preferredGraph = input()

                x_data = dataFrame[ids[0]].tolist()
                y_data = dataFrame[ids[1]].tolist()

                errorSet = buildErrorSet(len(x_data))

                if preferredGraph == "scatter":
                    scatterplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                elif preferredGraph == "violin":
                    violinplot([x_data, y_data], x_label=ids[0], y_label=ids[1])
                else:
                    print("Wrong graph type.")



            if dataFrame.dtypes[idsIndex[0]] == "float64" and dataFrame.dtypes[idsIndex[1]] == "float64":

                if not dataFrame[ids[0]].is_monotonic:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                else:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")

            #if dataFrame.dtypes[idsIndex[0]] == "object" and dataFrame.dtypes[idsIndex[1]] == "int64":
                #dates = pd.to_datetime(dataFrame[ids[1]].tolist(), format='%Y-%m-%d')
                #lineplot(dates, dataFrame[ids[1]].tolist())

            #if dataTypesArray[idsIndex[1]] == "int64" and dataTypesArray[idsIndex[0]] == "object":
               # dates = pd.to_datetime(df.iloc[idsInex[0],:], format='%Y-%m-%d')
               # print("teste4")
        else:
            # Bar chart vertical, scatter
            if dataFrame.dtypes[idsIndex[0]] == "int64" and dataFrame.dtypes[idsIndex[1]] == "int64":

                if not dataFrame[ids[0]].is_monotonic:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are BOXPLOT or SCATTER plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    errorSet = buildErrorSet(len(x_data))

                    if preferredGraph == "scatter":
                        scatterplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                    elif preferredGraph == "boxplot":
                        #boxplot(x_data, y_data, errorSet, x_label=ids[0], y_label=ids[1], title="")
                        boxplot = dataFrame.boxplot(column=ids)
                    else:
                        print("Wrong graph type.")
                else:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")

            # Line chart

            if dataFrame.dtypes[idsIndex[0]] == "int64" and dataFrame.dtypes[idsIndex[1]] == "float64":
                graphSelection()

                print("Choose the most confortable graph for the data you've inputted:")
                print("Recommended types are BOXPLOT or SCATTER plots.")
                print("Type the graph you would like to see:")
                preferredGraph = input()

                x_data = dataFrame[ids[0]].tolist()
                y_data = dataFrame[ids[1]].tolist()

                errorSet = buildErrorSet(len(x_data))

                if preferredGraph == "scatter":
                    scatterplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                elif preferredGraph == "boxplot":
                    #boxplot(x_data, y_data, errorSet, x_label=ids[0], y_label=ids[1], title="")
                    boxplot = dataFrame.boxplot(column=ids)
                else:
                    print("Wrong graph type.")



            if dataFrame.dtypes[idsIndex[0]] == "float64" and dataFrame.dtypes[idsIndex[1]] == "float64":

                if not dataFrame[ids[0]].is_monotonic:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are SCATTER, BOXPLOT or LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                    elif preferredGraph == "scatter":
                        scatterplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")
                    elif preferredGraph == "boxplot":
                        #boxplot(x_data, y_data, errorSet, x_label=ids[0], y_label=ids[1], title="")
                        boxplot = dataFrame.boxplot(column=ids)
                    else:
                        print("Wrong graph type.")
                else:
                    graphSelection()

                    print("Choose the most confortable graph for the data you've inputted:")
                    print("Recommended types are LINE plots.")
                    print("Type the graph you would like to see:")
                    preferredGraph = input()

                    x_data = dataFrame[ids[0]].tolist()
                    y_data = dataFrame[ids[1]].tolist()

                    if preferredGraph == "line":
                        lineplot(x_data, y_data, x_label=ids[0], y_label=ids[1], title="")

            #if dataFrame.dtypes[idsIndex[0]] == "object" and dataFrame.dtypes[idsIndex[1]] == "int64":
                #dates = pd.to_datetime(dataFrame[ids[1]].tolist(), format='%Y-%m-%d')
                #lineplot(dates, dataFrame[ids[1]].tolist())

            #if dataTypesArray[idsIndex[1]] == "int64" and dataTypesArray[idsIndex[0]] == "object":
               # dates = pd.to_datetime(df.iloc[idsInex[0],:], format='%Y-%m-%d')
               # print("teste4")
        
        
        
        
        
    else:
        # Scatter, bar graphs
        if dataFrame.dtypes[idsIndex[0]] == "int64":
            resp = getMostOcurrences(dataFrame, ids[0])

            errorSet = buildErrorSet(len(resp[0]))
            
            graphSelection()
            
            print("Choose the most confortable graph for the data you've inputted:")
            print("Recommended types are SCATTER, HISTOGRAM and BAR plots.")
            print("Type the graph you would like to see:")
            preferredGraph = input()
            
            if preferredGraph == "scatter":
                scatterplot(resp[1], resp[0], x_label=ids[0], y_label="ocurrences", title="")
            elif preferredGraph == "bar":
                barplot(resp[1], resp[0], errorSet, x_label=ids[0], y_label="ocurrences", title="")
            elif preferredGraph == "histogram":
                newDataFrame = pd.DataFrame(resp[1])
                histogram = newDataFrame.plot.hist(bins=12, alpha=0.5)
                #histogram(resp[1], 12, x_label = "", y_label = "", title = "")
            #elif preferredGraph == "boxplot":
                #boxplot(resp[1], resp[0], x_label=ids[0], y_label="ocurrences")
            else:
                print("Wrong graph type.") 

        # Line graph
        if dataFrame.dtypes[idsIndex[0]] == "float64":
            resp = getMostOcurrences(dataFrame, ids[0])

            errorSet = buildErrorSet(len(resp[0]))
            
            graphSelection()

            print("Choose the most confortable graph for the data you've inputted:")
            print("Recommended types are SCATTER, HISTOGRAM and BAR plots.")
            print("Type the graph you would like to see:")
            preferredGraph = input()
            
            if preferredGraph == "scatter":
                scatterplot(resp[1], resp[0])
            elif preferredGraph == "bar":
                barplot(resp[0], resp[1], x_label=ids[0], y_label="ocurrences", title="")
            elif preferredGraph == "histogram":
                newDataFrame = pd.DataFrame(resp[1])
                histogram = newDataFrame.plot.hist(bins=12, alpha=0.5)
            else:
                print("Wrong graph type.")

        # String and time related graphs
        if dataFrame.dtypes[idsIndex[0] ] == "object":
            resp = getMostOcurrences(dataFrame, ids[0])
            
            graphSelection()
            
            print("Choose the most confortable graph for the data you've inputted:")
            print("Recommended types are SCATTER and BAR plots.")
            print("Type the graph you would like to see:")
            preferredGraph = input()
            
            dates = pd.to_datetime(resp[0], format='%Y-%m-%d')
            
            errorSet = buildErrorSet(len(resp[1]))
            
            if preferredGraph == "scatter":
                scatterplot(resp[0], resp[1])
            elif preferredGraph == "bar":
                barplot(resp[1], resp[0], errorSet, x_label=ids[0], y_label="ocurrences", title="")
            else:
                print("Wrong graph type.")
                
    #profile = ProfileReport(dataFrame, minimal=True)
    #profile.to_file("output2.html")
            
def quantile(df):
    quantile = []
    
    quantileRate = 0.25
    numberOfQuantiles = 3
    currentQuantileRate = 0.25
    
    #quantil1 = df.quantile(q=0.25, axis=1, numeric_only=True, interpolation='linear')
    #quantil2 = df.quantile(q=0.5, axis=1, numeric_only=True, interpolation='linear')
    #quantil3 = df.quantile(q=0.75, axis=1, numeric_only=True, interpolation='linear')
    
    for i in range(numberOfQuantiles):
        quantil = df.quantile(q=currentQuantileRate, axis=1, numeric_only=True, interpolation='linear')
        currentQuantileRate += quantileRate
        quantile.append(quantil)
    
    return quantile
            
def buildErrorSet(n):
    errorSet = []
    
    for i in range(n):
        errorSet.append(0)
        
    return errorSet
            
def getMostOcurrences(df, ids):
    xDict = Counter(df[ids].tolist())
    xAxisSet = list(xDict)
    xAxis = []

    for index in range(0, len(xAxisSet)):
        xAxis.append([xDict[xAxisSet[index]], xAxisSet[index]])

    dataFrame = pd.DataFrame(xAxis)
    return dataFrame
            
def sortDateTime(dates):
    # ['5-Nov-18', '25-Mar-17', '1-Nov-18', '7-Mar-17']
    dates.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d"))
    return dates

In [None]:
#profile.to_widgets()

#profile.to_notebook_iframe()

#profile.to_file("your_report.html")

#profile.to_file("your_report.json")

#Version 2.4 introduces minimal mode. This is a default 
#configuration that disables expensive computations (such as correlations and dynamic binning).
#profile = ProfileReport(large_dataset, minimal=True)
#profile.to_file("output.html")
from IPython.display import clear_output

def logo():
    print("""               
    ,------.            ,--.              ,--.   ,--.,--.        
    |  .-.  \  ,--,--.,-'  '-. ,--,--.     \  `.'  / `--',-----. 
    |  |  \  :' ,-.  |'-.  .-'' ,-.  |      \     /  ,--.`-.  /  
    |  '--'  /\ '-'  |  |  |  \ '-'  |       \   /   |  | /  `-. 
    `-------'  `--`--'  `--'   `--`--'        `-'    `--'`-----' 
    """)
    
def graphSelection():
    print("""  
    """)
    
def spacing():
    print("""
    
    
    
    """)
    
def clear():
    clear_output(wait=False)

In [None]:
class ButtonHandler:
    
    def __init__(self):
        self.count = 0
        
    def buttonClicked(self, button):
        print("teste1")
        if button.description == "Finish selection":
            clear()
            heuristic()
        else:
            ids.append(button.description)
            button.disabled=True
            
def columnOptions(columns):
    
    b = ButtonHandler()
    
    list = []
    
    for column in columns:
        
        a = widgets.Button(
        description=column,
        disabled=False,
        button_style="danger",
        icon="check")
        
        a.on_click(b.buttonClicked)
        
        list.append(a)
        
    a = widgets.Button(
    description="Finish selection",
    disabled=False,
    button_style="danger",
    icon="ckeck")
    
    a.on_click(b.buttonClicked)
    
    list.append(a)
        
    display(widgets.HBox((list)))

In [None]:
# =========================================================
# Handle CSV file
# =========================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv as csvManager
import pandas_profiling as pp

# Used to print data inside the python file
%matplotlib inline

from pandas_profiling import ProfileReport
import ipywidgets as widgets
from IPython.display import display

output = widgets.Output()

    
def start():

    # Read CSV using pandas
    #dataSet = pd.read_csv('covid_de2.csv')
    #dataSet = pd.read_csv('country_wise_latest.csv')
    # dataSet = {'Cases': [1,1,2,5,3,3,4,5,2,8,9,6,5,3,4,5,7,6,7,4,2,2,1,5,7,6,4,2], 
    #            'Confirmed': [1,2,2,2,3,4,7,6,9,9,6,7,8,8,8,7,6,7,8,7,6,5,5,4,4,2,1,1],
    #            'Time Past In Days': [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55]}
    
    print("Enter dataset name:")
    dataSetPath = input()
    
    clear()
    
    dataSet = pd.read_csv(dataSetPath)

    dataFrame = pd.DataFrame(dataSet)

    #dataFrame.dtypes

    # Get relevant columns from CSV dataframe
    (relevantColumnsIndex, relevantColumns) = getRelevantColumns(dataFrame)

    # Get metadata from relevant columns
    dataFromColumns = getDataFromRelevantColumns(relevantColumnsIndex, dataFrame)

    print("Input one or wo desired columns, divided by comma.")
    labels = dataFrame.columns.values
    
    relevantLabels = []
    relevantLabelsIndex = []

    for i in relevantColumnsIndex:
        relevantLabels.append(labels[i])
        relevantLabelsIndex.append(i)
        
    list = []
        
    for column in relevantLabels:
        
        a = widgets.Button(
        description=column,
        disabled=False,
        button_style="danger",
        icon="check")
        
        list.append(a)
        
    print(relevantLabels)
    display(widgets.HBox((list)))
    ids = input()
    
    clear()
    
    ids = ids.split(",")

    if ids[0] not in relevantLabels:
        print("You have miss spelled columns or bad format.")

        if len(ids) > 1:
            if ids[1] not in relevantLabels:
                print("You have miss spelled columns or bad format.")
                
    #print(relevantColumns)

    heuristics(relevantColumns, relevantColumnsIndex, dataFromColumns, relevantLabelsIndex, labels, ids, dataFrame)

    #histogram(dataFromColumns[0], 0)

    #profile = ProfileReport(dataFrame, title="Pandas Profiling Report", explorative=True)
    #profile.progress_bar(False)
    #profile.to_file("your_report.html")

    #profile = ProfileReport(dataFrame, minimal=True)
    #profile.to_file("output.html")




![datavisualization.jpg](attachment:datavisualization.jpg)

# **Triagem Assistidade de Visualização de Dados**

Choose the data you want to plot in a chart
by choosing one or two columns.

In [None]:
start()

![datavisualization2.jpg](attachment:datavisualization2.jpg)