IRIS DATASET

In [11]:
import altair as alt
import pandas as pd
import numpy as np

In [12]:
from vega_datasets import data

In [13]:
iris = data.iris()

In [14]:
iris.shape

(150, 5)

In [15]:
iris.head()

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [16]:
iris.columns

Index(['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'], dtype='object')

# Scatter plot to show correlation between sepal dimensions

In [63]:
alt.Chart(iris, title='Scatter plot showing correlation between sepal dimensions in iris species').mark_point().encode(
    x='sepalLength',
    y='sepalWidth',
    color = 'species',
    ).properties(
    width=250,
    height=250
    )


# Strip plot to show distinction between petal length of different species

In [49]:
alt.Chart(iris, title='Strip plot showing distinction between petal length in iris species').mark_rule().encode(
    x='petalLength',
    color = 'species',
    
    )

# Similar to above - candlestick chart to show distinction between petal width of different species

In [48]:
alt.Chart(iris, title='Candlestick chart showing distinction between petal width in iris species').mark_tick().encode(
    x='petalWidth',
    color = 'species',
    )

# Scatter plot to show correlation between petal dimensions

In [47]:
alt.Chart(iris, title='Scatter plot showing correlation between petal dimensions in iris species').mark_circle().encode(
    x='petalWidth',
    y = 'petalLength',
    color = 'species'
    )

# Box plot to show correlation between petal dimensions and varience in data / margin of error

In [54]:
alt.Chart(iris, title='Box plot showing correlation between petal dimensions in iris species').mark_boxplot().encode(
    x='petalWidth:Q',
    y = 'petalLength:Q',
    color = 'species:N',
    ).properties(width=600).configure_axis(
        labelFontSize=12,
        titleFontSize=12
    )


# Bar chart showing petal width of different species

In [32]:
alt.Chart(iris).mark_bar().encode(
    x='petalWidth',
    y = 'species',
    color = 'species'
    )

# Bar chart showing number of records for fixed ranges of petal lengths of different iris species to show estimated frquencies

In [64]:
alt.Chart(iris).mark_bar().encode(
    x=alt.X('petalLength', bin=alt.Bin(maxbins=20)),
    y='count()',
    color='species',
    
    )


# Density chart showing typical distribution of  iris dimensions

In [None]:

alt.Chart(iris).transform_fold(
    ['petalWidth',
     'petalLength',
     'sepalWidth',
     'sepalLength'],
    as_ = ['Measurement_type', 'value']
).transform_density(
    density='value',
    bandwidth=0.3,
    groupby=['Measurement_type'],
    extent= [0, 8]
).mark_area().encode(
    alt.X('value:Q'),
    alt.Y('density:Q'),
    alt.Row('Measurement_type:N')
).properties(width=300, height=50)

STUDENT PERFORMANCE DATASET

In [None]:
import numpy as np
import altair as alt
import pandas as pd



In [None]:
data = pd.read_csv("math.csv")
data.shape

(395, 33)

In [None]:
data.head(5)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


In [None]:
data.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime',
       'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery',
       'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
       'Walc', 'health', 'absences', 'G1', 'G2', 'G3'],
      dtype='object')

In [None]:
alt.Chart(data).mark_point().encode(
    x='G3',
    y='G1',
    color='sex',
)

In [None]:
alt.Chart(data).mark_tick().encode(
    x='G3',
    y='activities:N',
    color='sex:N'
    
)

In [None]:
brush=alt.selection(type='interval')
base=alt.Chart(data).add_selection(brush)
points=base.mark_point

In [None]:
alt.Chart(data).mark_circle(size=60).encode(
    x='G3',
    y='count(G3)',
    color='sex',
    tooltip=['absences', 'sex', 'G3', 'absences']
).interactive()


In [None]:
alt.Chart(data).mark_point().encode(
    x='G3',
    y='absences',
    color='sex',
)

In [None]:
alt.Chart(data).mark_point().encode(
    x='G3',
    y='absences',
    color='sex',
).transform_filter(alt.datum.G3==15)

In [None]:
alt.Chart(data).mark_bar().encode(
    x= 'address',
    y='count(G3)',
    color='sex:N'

).transform_filter(alt.datum.G3==10)

In [None]:

alt.Chart(data).mark_bar().encode(
    alt.X("absences:Q", bin=True),
    y='G3',
    color='sex'
)

In [None]:
alt.Chart(data).mark_bar().encode(
    x="G3",
    y="count(G3):Q",
    color="sex:N",
    column="sex"
)

In [None]:
alt.Chart(data).mark_bar().encode(
    x=alt.X('G3', bin=alt.Bin(maxbins=20)),
    y='count()',
    color='sex'
    )

In [None]:
alt.Chart(data).mark_line().encode(
    alt.X('G2'),
    alt.Y('count(G2)'),
    color='sex:N'
)