### Altair is a Python Library designed for statistical visualization.
##### It is based on Vega and Vega-Lite, which are both visualization grammar 

In [2]:
import pandas as pd
import altair as alt

In [3]:
student = pd.read_csv('student performance.csv')
student.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


# Bar chart 

In [None]:
# Bar chart 

alt.Chart(student).mark_bar(size=55).encode(x='race/ethnicity', y='math score', color='test preparation course').properties(width=500, height=200)

##### Horizontal Bar Chart

In [5]:
# horizontal bar chart with tooltip

alt.Chart(student).mark_bar(size=40).encode(y='race/ethnicity', x='math score', color='race/ethnicity',
                                tooltip=[alt.Tooltip('reading score'),alt.Tooltip('writing score')]
                               ).properties(height=250, width=400)

##### Stacked Bar Chart

In [8]:
stack = student.melt(id_vars=['race/ethnicity'], value_vars=['math score','reading score','writing score'],
               var_name='subjects', value_name='marks')
stack.head()

Unnamed: 0,race/ethnicity,subjects,marks
0,group B,math score,72
1,group C,math score,69
2,group B,math score,90
3,group A,math score,47
4,group C,math score,76


In [9]:
# Stacked Bar Chart

alt.Chart(stack).mark_bar().encode(x='race/ethnicity', y='mean(marks)', 
                                   color='subjects').properties(
    height=350, width=400, title='Average of each Subjects')


##### Grouped Bar Chart

In [10]:
alt.Chart(stack).mark_bar().encode(alt.X('subjects', axis=alt.Axis(titleFontSize=15)), 
                                   y=alt.Y('marks', axis=alt.Axis(titleFontSize=15)), color='subjects', 
                                   column='race/ethnicity').properties(
    height=200,width=140,title='Average marks per category of race').configure_axis(
    labelFontSize=16).configure_title(fontSize=18)

# Point

In [6]:
from vega_datasets import data
df = data.cars()
df.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [7]:
alt.Chart(df).mark_point().encode(x='Horsepower', y='Miles_per_Gallon',color='Origin',
                                 tooltip=['Name','Cylinders']).properties(height=200, width=500)

# Histogram

In [11]:
diamonds = pd.read_csv('diamonds.csv')
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [12]:
# Histogram

alt.Chart(diamonds).mark_bar(color='orange').encode(x=alt.X('carat', bin=True),
                                             y='count()').properties(width=300)

In [13]:
# layered Histogram

layered = diamonds.melt(id_vars=['cut'], value_vars=['depth','table',],
                 var_name='type', value_name='Value')

In [14]:
alt.Chart(layered).mark_bar(color='lawngreen',opacity=0.6,binSpacing=0).encode(x=alt.X('Value', bin=alt.Bin(maxbins=120)),
                                                                              y=alt.Y('count()', stack=None),
                                                                               color='type:N').properties(width=700,
                                                                                                         height=300)

# Line plot

In [15]:
apple = pd.read_csv('apple.csv')
apple.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-10-13,125.269997,125.389999,119.650002,121.099998,119.655579,262330500
1,2020-10-14,121.0,123.029999,119.620003,121.190002,119.744514,150712000
2,2020-10-15,118.720001,121.199997,118.150002,120.709999,119.270233,112559200
3,2020-10-16,121.279999,121.550003,118.809998,119.019997,117.60038,115393800
4,2020-10-19,119.959999,120.419998,115.660004,115.980003,114.596664,120639300


In [16]:
apple['Date'] = pd.to_datetime(apple['Date'])

In [17]:
alt.Chart(apple).mark_line(color='red').encode(x='Date:T',
                                               y=alt.Y('Close:Q', scale=alt.Scale(zero=False))
                                              ).properties(width=800, title='Apple Close price from 2020-10-13 to 2020-10-13'
                                                          ).configure_axis(labelFontSize=12)

Here we specify column data category with one character like 'Date:T'

This ('Date:T') hint to altair that date column needs to be considered by as datetime col

follow this things, Close column considered by Quantitative Data.

#### Why do we have to do it ? 
Because if Altair is failing to recognize the exact type, we can specify the column type like this.

##### Commonly Used Data Category in Altair:
    * T: Date-time
    * Q: Quantitative
    * O: Ordered
    * N: Nominal

##### Multiple Lines Chart

In [18]:
# Multiple Lines Chart

apple_multiple = apple.reset_index().melt(id_vars=['Date'],
                                         value_vars=['Open','Close','High','Low'],
                                         var_name = 'Type', value_name='Value')
apple_multiple.head()

Unnamed: 0,Date,Type,Value
0,2020-10-13,Open,125.269997
1,2020-10-14,Open,121.0
2,2020-10-15,Open,118.720001
3,2020-10-16,Open,121.279999
4,2020-10-19,Open,119.959999


In [19]:
alt.Chart(apple_multiple).mark_line().encode(x='Date:T',
                                            y=alt.Y('Value:Q', scale=alt.Scale(zero=False)),
                                            color='Type').properties(width=750, title='Apple OHLC from 2020 to 2022'
                                                                    ).configure_axis(labelFontSize=12)

# Area Chart

In [20]:
# Area Chart

alt.Chart(apple).mark_area(color='tomato').encode(x='Date',y=alt.Y('Close:Q',scale=alt.Scale(zero=False))).properties(width=500,
                                                                                                       title='Apple Area Chart from 2020 to 2022')

In [21]:
# Stacked Area Chart

alt.Chart(apple_multiple).mark_area().encode(x='Date:T',y=alt.Y('Value:Q',stack=None),color='Type'
                                            ).properties(width=500)

In [22]:
# area chart but not stacked area chart

alt.Chart(apple_multiple).mark_area().encode(x='Date:T',y=alt.Y('Value:Q',stack=None),color='Type'
                                            ).properties(width=500)

# Box Plot

In [23]:
# Box plot

alt.Chart(student).mark_boxplot().encode(x=alt.X('race/ethnicity'),
                                        y=alt.Y('math score:Q', scale=alt.Scale(zero=False))
                                        ).properties(width=400, title='Distribution of Math Score per Race/Ethnicity')

In [25]:
alt.Chart(stack).mark_boxplot(color='tomato').encode(x=alt.X('subjects'),
                                                    y=alt.Y('marks', scale=alt.Scale(zero=False)),
                                                    color='subjects').properties(width=400,
                                                                                title='Distribution of Subject Marks')