In [None]:
import pandas as pd
import pandas_profiling
import numpy as np
import json
from jinja2 import Environment, FileSystemLoader

In [None]:
%%javascript
require.config({
paths: {
d3: "https://d3js.org/d3.v5.min",
}
});

require(["d3"], function(d3) {
window.d3 = d3;
});

In [None]:
def build_data(df):
    count = []
    considered_cols = []
    last_index = 0
    keys = {}

    df.replace(to_replace=["na", "?", np.nan, "missing", "not available",
                           "n/a", "missing value"], value=np.nan, inplace=True)

    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')

    for col in df.columns:
        if df[col].dtype == "object":
            df[col].replace(to_replace=["na", "?", np.nan, "missing", "not available",
                                        "n/a", "missing value"], value="missing value", inplace=True)
            if df[col].unique().size < 25:
                considered_cols.append(col)
                keys[col] = {"values": df[col].unique().tolist(
                ), "start_i": last_index, "end_i": last_index + df[col].unique().size}
                last_index = last_index + df[col].unique().size
                for item in df[col].unique():
                    count.append([col, item])
    data_array = np.zeros((len(count), len(count)))

    for row in range(df.shape[0]):
        for col in range(df.shape[1]):
            for i in range(col+1, df.shape[1]):
                if df.columns[col] in considered_cols and df.columns[i] in considered_cols:
                    data_array[count.index([df.columns[col], df.iloc[row, col]]), count.index(
                        [df.columns[i], df.iloc[row, i]])] += 1

    data_array += np.transpose(data_array)
    return keys, data_array.tolist()

In [None]:
def table_data_format(df_orig):
    metadata = []
    df = df_orig
    df.replace(to_replace=["na", "?", np.nan, "missing", "not available",
                           "n/a", "missing value"], value=np.nan, inplace=True)

    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')

    for idx, col in enumerate(df.columns):
        helper = {}
        key = col
        if len(key) > 15:
            key = col[:15]

        helper['ascend'] = 0
        helper['name'] = key
        if(df.dtypes[col] == "object"):
            df[col].replace(to_replace=["na", "?", np.nan, "missing", "not available",
                                        "n/a", "missing value"], value="missing value", inplace=True)
            helper['datatype'] = 'string'
            helper['values'] = df[col].unique().tolist()
        elif(df.dtypes[col] == "int64"):
            helper['datatype'] = 'int'
            helper['min_val'] = df[col].min()
            helper['max_val'] = df[col].max()
        elif(df.dtypes[col] == "float64"):
            helper['datatype'] = 'float'
            helper['min_val'] = df[col].min()
            helper['max_val'] = df[col].max()
        metadata.append(helper)

    df.replace(to_replace=["na", "?", np.nan, "missing", "not available",
                           "n/a", "missing value"], value="missing value", inplace=True)
    data_array = []
    for row in range(df.shape[0]):
        aux = []
        for col in range(df.shape[1]):
            aux.append(df.iloc[row, col])
        data_array.append(aux)
    return metadata, data_array

In [None]:
fields = [
          'symboling','normalized-losses','make','fuel-type','aspiration','num-of-doors','body-style','drive-wheels',
          'engine-location','wheel-base','length','width','height','curb-weight','engine-type','num-of-cylinders',
          'engine-size','fuel-system','bore','stroke','compression-ratio','horsepower','peak-rpm','city-mpg',
          'highway-mpg','price'
         ]

path = r'C:\Users\Luis_Dutra\3D Objects\datasets\automobile\automobile_data_without_id.csv'
df = pd.read_csv(path)

print('completed')

In [None]:
df = df.reset_index()
df.drop(columns=['index'], inplace=True)
df

In [None]:
file_loader = FileSystemLoader("C:\pandas-profiling\examples")
env = Environment(loader=file_loader)

template = env.get_template("table.html")

keys, data_array = table_data_format(df)

output = template.render(metadata_table_viz=keys, dataarray_table_viz=data_array)

# Arquivo onde contém o 'meta_data_table_viz' e o 'data_array_table_viz'
with open("test.html", "w") as result_file:
    result_file.write(output)

In [None]:
columns = ["symboling", "normalized-losses", "make", "fuel-type", "aspiration", "num-of-doors", "body-style", "drive-wheels", "engine-location", "wheel-base", "length", "width", "height",
           "curb-weight", "engine-type", "num-of-cylinders", "engine-size", "fuel-system", "bore", "stroke", "compression-ratio", "horsepower", "peak-rpm", "city-mpg", "highway-mpg", "price"]

path = r'C:\Users\Luis_Dutra\3D Objects\datasets\automobile\automobile_data_without_id.csv'
df = pd.read_csv(path, names=columns)

for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='ignore')

df.replace(to_replace=["na", "?", np.nan, "missing", "not available","n/a", "missing value"], value=np.nan, inplace=True)

In [None]:
pandas_profiling.ProfileReport(df)

In [None]:
pandas_profiling.ProfileReport(df).to_file(r'C:\pandas-profiling\reports\automobile_report1.html')

In [None]:
table_data_format(df)

In [None]:
import pandas as pd
import numpy as np
import pandas_profiling

columns = ["symboling","normalized-losses","make","fuel-type","aspiration","num-of-doors",
           "body-style","drive-wheels","engine-location","wheel-base","length","width",
           "height","curb-weight","engine-type","num-of-cylinders","engine-size",
           "fuel-system","bore","stroke","compression-ratio","horsepower","peak-rpm",
           "city-mpg","highway-mpg","price"]

path = r'C:\Users\Luis_Dutra\3D Objects\datasets\automobile\automobile_data_without_id.csv'
df = pd.read_csv(path, names=columns)

pandas_profiling.ProfileReport(df).to_file(r'C:\pandas-profiling\reports\automobile_pandas_profiling.html')