In [1]:
import altair as alt
import pandas as pd

In [2]:
# Load the iris.data file using pandas
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv('iris.data', names=column_names)

In [37]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [4]:
# Using altair to paint a scatter plot, with x axis as sepal_length, y axis as sepal_width, and color as class
alt.Chart(df).mark_circle().encode(
    x='sepal_length',
    y='sepal_width',
    color='class'
).interactive()



In [10]:
# Plot the class data using box plot
# Make the plot interactive and wider, the x axis lable should be shown horizontally
alt.Chart(df).mark_boxplot().encode(
    x='class',
    y='sepal_length'
).interactive().properties(
    width=600
).configure_axis(
    labelAngle=0
)


In [25]:
# Keeping the same plot but change the color of the box plot to each class
# Add data points as scatter plot to the box plot
alt.Chart(df).mark_boxplot().encode(
    x='class',
    y='sepal_length',
    color='class'
).interactive().properties(
    width=600
).configure_axis(
    labelAngle=0
)

In [41]:
# Plot a violin plot, x axis as class, y axis as petal_length
# Make the plot interactive and wider, the x axis lable should be shown horizontally
alt.Chart(df).transform_density(
    'petal_length',
    as_=['petal_length', 'density'],
    extent=[0, 8],
    groupby=['class']
).mark_area().encode(
    y='petal_length:Q',
    color='class:N',
    x=alt.X(
    'density:Q',
    stack='center',
    impute=None,
    title=None,
    axis = alt.Axis(values=[0], labels=False, ticks=True, grid=False)
    ),
    column=alt.Column('class:N', header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelPadding=0))
).properties(
    width=100,
    height=200
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).configure_axis(
    labelAngle=0
)

In [43]:
# Create a line chart with x axis as sepal_length, y as its percentage in the dataset
# The line should be colored by class
alt.Chart(df).mark_line().encode(
    x='sepal_length',
    y='count()',
    color='class'
).interactive()


In [52]:
# Instead of using the line chart, use the area chart to plot the same data
# Set color's opacity dynamically so that the area chart won't overlap with each other

alt.Chart(df).mark_area(opacity=0.8).encode(
    x='sepal_length',
    y='count()',
    color=alt.Color('class', scale=alt.Scale(range=['#4c78a8', '#f58518', '#e45756']), legend=None)
).interactive()
