# Hierarchical Graphs

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 10)
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Automobile Dataset

In [3]:
# Defining the headers
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration", "num_doors", 
            "body_style", "drive_wheels", "engine_location","wheel_base", "length", "width", 
            "height", "curb_weight", "engine_type", "num_cylinders", "engine_size", "fuel_system",
            "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm","city_mpg", 
            "highway_mpg", "price"]

In [4]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data",
                  header=None, names=headers, na_values="?" )
df.head(3)

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,...,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,,alfa-romero,gas,std,...,111.0,5000.0,21,27,13495.0
1,3,,alfa-romero,gas,std,...,111.0,5000.0,21,27,16500.0
2,1,,alfa-romero,gas,std,...,154.0,5000.0,19,26,16500.0


### Analyzing Missing Values

`Plotly` does not work with missing values. We are going to remove them.

In [5]:
# Analysing missing values
df.isnull().sum()

symboling             0
normalized_losses    41
make                  0
fuel_type             0
aspiration            0
num_doors             2
body_style            0
drive_wheels          0
engine_location       0
wheel_base            0
length                0
width                 0
height                0
curb_weight           0
engine_type           0
num_cylinders         0
engine_size           0
fuel_system           0
bore                  4
stroke                4
compression_ratio     0
horsepower            2
peak_rpm              2
city_mpg              0
highway_mpg           0
price                 4
dtype: int64

In [6]:
# Removing the missing values
df.dropna(inplace=True)
df.isnull().sum()

symboling            0
normalized_losses    0
make                 0
fuel_type            0
aspiration           0
num_doors            0
body_style           0
drive_wheels         0
engine_location      0
wheel_base           0
length               0
width                0
height               0
curb_weight          0
engine_type          0
num_cylinders        0
engine_size          0
fuel_system          0
bore                 0
stroke               0
compression_ratio    0
horsepower           0
peak_rpm             0
city_mpg             0
highway_mpg          0
price                0
dtype: int64

## Sunburst Plots

A sunburst diagram displays a hierarchical structure. The center of the circle represents the organization's origin, and an extra ring symbolizes each level of the organization.

In [7]:
# body_style is the inner ring, and fuel_type is the outer one
fig_s1 = px.sunburst(df, path=["body_style","fuel_type"], 
                    width=600, height=600, title='Body Style & Fuel Type')
fig_s1.show()

If you hover the mouse over the sedan/gas sector, you will see the following information:
- labels = gas
- count = 67
- parent = sedan
- id = sedan/gas

In [8]:
# Creating a DataFrame with all sedan/gas cars.
sedan_gas = df[(df.body_style == "sedan") & (df.fuel_type == "gas")]
print(sedan_gas.shape)

(67, 26)


There are 67 sedan/gas cars.

### Variables order inside the path argument

In [9]:
# fuel_type is the inner ring, and body_style is the outer one
fig_s2 = px.sunburst(df, path=["fuel_type", "body_style"], 
                    width=600, height=600, title='Fuel Type & Body Style')
fig_s2.show()

As you can see, the order in the path parameter is important.

### Adding `values`

In [10]:
# Adding the parameter values 
fig_s3 = px.sunburst(df, path=["body_style","fuel_type"], values="price", 
                    width=600, height=600, title='Body Style & Fuel Type with Price')
fig_s3.show()

The sunburst graphs `fig_s1` and `fig_s3` are a little bit different. `fig_s3` includes the parameter `values='price'`.

If you hover the mouse over the `sedan/gas` sector, you will see the following information:
- labels = gas
- price= 813,499
- parent = sedan
- id = sedan/gas

In [11]:
# Computing the sum of the prices for the sedan/gas cars
sedan_gas.price.sum()

813499.0

### Adding `color`

In [12]:
# Let's use a different example.
tip = px.data.tips()

In [13]:
print(tip.shape)
tip.head()

(244, 7)


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [14]:
fig_s4 = px.sunburst(tip, path=['day', 'time'], values='total_bill',
                    width=600, height=600, title='Day & Time with Total Bill')
fig_s4.show()

In [15]:
# Adding the color parameter
fig_s4 = px.sunburst(tip, path=['day', 'time'], values='total_bill',
                    color='tip', color_continuous_scale='RdBu',
                    width=600, height=600, title='Day & Time with Total Bill')
fig_s4.show()

In [16]:
# Formatting the hover data
fig_s4 = px.sunburst(tip, path=['day', 'time'], values='total_bill',
                    color='tip', color_continuous_scale='RdBu',
                    hover_data={'tip':':.2f'},
                    width=600, height=600, title='Day & Time with Total Bill')
fig_s4.show()

If you hover your mouse over the `Sat/Dinner` section, you will see:
- labels=Dinner
- total_bill=1778.4
- parent=Sat
- id=Sat/Dinner
- tip=3.52

In [17]:
# Verifying the sum of total_bill for Sat/Dinner
sat_din = tip[(tip.day=='Sat') & (tip.time=='Dinner')]
print('The sum of total_bill for Sat/Dinner is: %.1f' %(sat_din.total_bill.sum()))

The sum of total_bill for Sat/Dinner is: 1778.4


In [18]:
# Computing the number of cases for Saturday/Dinner
print(sat_din.shape)
sat_din.head()

(87, 7)


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
19,20.65,3.35,Male,No,Sat,Dinner,3
20,17.92,4.08,Male,No,Sat,Dinner,2
21,20.29,2.75,Female,No,Sat,Dinner,2
22,15.77,2.23,Female,No,Sat,Dinner,2
23,39.42,7.58,Male,No,Sat,Dinner,4


The value `tip=3.51924` is computed as the weighted average of the color values (`color='tip'`) using as weight `values='total_bill'`.

In [19]:
# Computing tip (color values) times total_bill (weight values)
sat_din['total_bill_tip'] = sat_din.total_bill * sat_din.tip
sat_din.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,total_bill_tip
19,20.65,3.35,Male,No,Sat,Dinner,3,69.1775
20,17.92,4.08,Male,No,Sat,Dinner,2,73.1136
21,20.29,2.75,Female,No,Sat,Dinner,2,55.7975
22,15.77,2.23,Female,No,Sat,Dinner,2,35.1671
23,39.42,7.58,Male,No,Sat,Dinner,4,298.8036


In [20]:
# Dividing the sum of total_bill_tip by the sum of the weight values (sum of total_bill)
print('Average of tip with total_bill as a weight: %.2f' 
                        %(sat_din.total_bill_tip.sum()/sat_din.total_bill.sum()))

Average of tip with total_bill as a weight: 3.52


In [21]:
# Choosing color as non-numerical data
fig_s4 = px.sunburst(tip, path=['sex', 'time', 'day'], values='total_bill',
                    color='sex', 
                    width=600, height=600, title='Day & Time with Total Bill')
fig_s4.show()

In [22]:
# Using an explicit mapping for discrete colors
fig_s4 = px.sunburst(tip, path=['sex', 'time', 'day'], values='total_bill',
                    color='sex', color_discrete_map={'Male':'dimgrey', 'Female':'coral'},
                    width=600, height=600, title='Day & Time with Total Bill')
fig_s4.show()

## Treemap Chart

Treemap graphs show hierarchical data using nested rectangles. The input data is the same as for Sunburst Charts. 

We define the hierarchy by labels and parents' attributes. You click on one sector to zoom in/out. It also shows a path bar in the upper-left corner of your treemap. You can also use the path bar to zoom out.

In [23]:
fig_t1 = px.treemap(df, path=["body_style","fuel_type"], 
                    width=1000, height=500, title='Body Style & Fuel Type')
fig_t1.show()

If you hover the mouse over the sedan/gas rectangle, you will see the following information:
- labels = gas
- count = 67
- parent = sedan
- id = sedan/gas

In [24]:
# Swapping the variables inside the path
fig_t2 = px.treemap(df, path=["fuel_type", "body_style"], 
                    width=1000, height=500, title='Fuel Type & Body Style')
fig_t2.show()

The order in the path parameter defines the graph.

### Adding `values`

In [25]:
fig_t3 = px.treemap(df, path=["body_style","fuel_type"], values="price",
                    width=1000, height=500, title='Body Style & Fuel Type with Price')
fig_t3.show()

The graphs of `fig_t1` and `fig_t3` are a little bit different. `fig_t3` includes the parameter `values='price'`.

If you hover the mouse over the `sedan/gas` rectangle, you will see the following information:
- labels = gas
- price= 813,499
- parent = sedan
- id = sedan/gas

In [26]:
# Including make in the path
fig_t3 = px.treemap(df, path=["body_style","fuel_type","make"], values="price",
                    width=1000, height=500, title='Body Style & Fuel Type with Price')
fig_t3.show()

### Adding `color`

Let's use a the tips example.

In [27]:
# Adding the color parameter
fig_t4 = px.treemap(tip, path=['day', 'time'], values='total_bill',
                    color='tip', color_continuous_scale='RdBu',
                    width=1000, height=500, title='Day & Time with Total Bill')
fig_t4.show()

In [28]:
# Formatting the hover data
fig_t4 = px.treemap(tip, path=['day', 'time'], values='total_bill',
                    color='tip', color_continuous_scale='RdBu',
                    hover_data={'tip':':.2f'},
                    width=1000, height=500, title='Day & Time with Total Bill')
fig_t4.show()

If you hover your mouse over the `Sat/Dinner` section, you will see:
- labels=Dinner
- total_bill=1778.4
- parent=Sat
- id=Sat/Dinner
- tip=3.52

In [29]:
# Choosing color as non-numerical data
fig_t4 = px.treemap(tip, path=['sex', 'time', 'day'], values='total_bill',
                    color='time', 
                    width=1000, height=500, title='Day & Time with Total Bill')
fig_t4.show()

## Icicle Charts

Icicle graphs visualize hierarchical data using rectangular sectors that cascade from root to leaves in one of four directions: up, down, left, or right. The input data is the same as for Sunburst and Treemap charts. 

We define the hierarchy by labels and parents' attributes. You click on one sector to zoom in/out. It also shows a path bar in the upper-left corner of your treemap. You can also use the path bar to zoom out.

In [30]:
fig_c1 = px.icicle(df, path=["body_style","fuel_type"], 
                    width=800, height=500, title='Body Style & Fuel Type')
fig_c1.show()

If you hover the mouse over the sedan/gas rectangle, you will see the following information:
- labels = gas
- count = 67
- parent = sedan
- id = sedan/gas

In [31]:
# Swapping the variables inside the path
fig_c2 = px.icicle(df, path=["fuel_type", "body_style"], 
                    width=800, height=500, title='Fuel Type & Body Style')
fig_c2.show()

The order in the path parameter defines the graph.

### Adding `values`

In [32]:
fig_c3 = px.icicle(df, path=["body_style","fuel_type"], values="price",
                    width=800, height=500, title='Body Style & Fuel Type with Price')
fig_c3.show()

The graphs of `fig_c1` and `fig_c3` are a little bit different. `fig_c3` includes the parameter `values='price'`.

If you hover the mouse over the `sedan/gas` rectangle, you will see the following information:
- labels = gas
- price= 813,499
- parent = sedan
- id = sedan/gas

In [33]:
# Including make in the path
fig_c3 = px.icicle(df, path=[px.Constant("Cars"),"body_style","fuel_type","make"], values="price",
                    width=600, height=700, title='Body Style & Fuel Type with Price')
fig_c3.show()

### Adding `color`

We will use the tips example.

In [34]:
# Adding the color parameter & formatting the hover data
fig_c4 = px.icicle(tip, path=['day', 'time'], values='total_bill',
                    color='tip', color_continuous_scale='RdBu',
                    hover_data={'tip':':.2f'},
                    width=800, height=600, title='Day & Time with Total Bill')
fig_c4.show()

If you hover your mouse over the `Sat/Dinner` section, you will see:
- labels=Dinner
- total_bill=1778.4
- parent=Sat
- id=Sat/Dinner
- tip=3.52

In [35]:
# Choosing color as non-numerical data
fig_c4 = px.icicle(tip, path=['sex', 'time', 'day'], values='total_bill',
                    color='time', 
                    width=800, height=600, title='Day & Time with Total Bill')
fig_c4.show()

## References

- https://plotly.com/python/sunburst-charts/
- https://plotly.com/python/treemaps/
- https://plotly.com/python/icicle-charts/