### exploring diamond's dataset for statistical_variations

### Remarks-
- Disable the maximum rows with this command- "alt.data_transformers.disable_max_rows() if the number exceeds 5000"
- use .properties() instead of .save() to display the illustrations as outputs. The only problem though is that two outputs can not be executed in the single cell
- made boxplots with _mark.boxplots(). Next, a bar chart was colored either using the 'stroke aesthetic', or, more usefully, 'color'
- for bars, with variable 'clarity' they are automatically stacked with the help of _mark.bar().
    - stacked the bar graphs
    - compared the bar-graphs with the common baseline
    - gallery for bar charts (altair) https://altair-viz.github.io/gallery/index.html#bar-charts
- Interactive bar maps from a single data source
- Awesome resource for spacial data science studies from penn state https://www.e-education.psu.edu/geog489/node/2201 
- Altair's gallery for maps https://altair-viz.github.io/gallery/index.html#maps

In [None]:
# installing missing libraries such as geopandas, gpdvega

In [None]:
pip install geopandas

In [None]:
pip install gpdvega

In [None]:
pip install -e git+https://github.com/altair-viz/altair.git

In [None]:
pip install --upgrade pip

In [None]:
#importing dictionaries
import pandas as pd   
import altair as alt  

In [None]:
url = "https://github.com/byuidatascience/data4python4ds/raw/master/data-raw/diamonds/diamonds.csv"
diamonds = pd.read_csv(url)

diamonds['cut'] = pd.Categorical(diamonds.cut, 
  ordered = True, 
  categories =  ["Fair", "Good", "Very Good", "Premium", "Ideal" ])

diamonds['color'] = pd.Categorical(diamonds.color, 
  ordered = True, 
  categories =  ["D", "E", "F", "G", "H", "I", "J"])


diamonds['clarity'] = pd.Categorical(diamonds.clarity, 
  ordered = True, 
  categories =  ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"])

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
chart = (alt.Chart(diamonds)
  .encode(
    x = "cut",
    y = alt.Y("count():Q")
    )
  .mark_bar()
  .properties(width = 400))

chart.properties()

contains information about ~54,000 diamonds, including the price, carat, color, clarity, and cut of each diamond

In [None]:
chart = (alt.Chart(diamonds)
  .encode(
    x =alt.X("price", bin=True),
    y =alt.Y("count()")
    )
  .mark_bar())

chart.properties()

In [None]:
chart = (alt.Chart(diamonds)
  .encode(
    y ="price",
    x ="cut"
    )
  .mark_boxplot(size = 25)
  .properties(width = 300))

chart.properties()

In [None]:
chart_left = (alt.Chart(diamonds)
  .encode(
    x = "cut",
    y = alt.Y("count()"),
    stroke = "cut"
    )
  .mark_bar()
  .properties(width = 200))
  
chart_right = (alt.Chart(diamonds)
  .encode(
    x = "cut",
    y = alt.Y("count()"),
    color = "cut"
    )
  .mark_bar()
  .properties(width = 200)) 

chart_left.properties()

In [None]:
chart_right.properties()

In [None]:

chart = (alt.Chart(diamonds)
  .encode(
    x = "cut",
    y = alt.Y("count()"),
    color = "clarity"
    )
  .mark_bar()
  .properties(width = 200))

chart.properties()

In [None]:
# now, we normalize the bar graphs to see the stacked position of it. 

chart = (alt.Chart(diamonds)
  .mark_bar()
  .encode(
    x = "cut",
    y = alt.Y("count()", stack='normalize'),
    color = "clarity"
    )
  .mark_bar()
  .properties(width = 200))

chart.properties()

In [None]:
# comparing the bar-graphs with the commonbaseline

chart = (alt.Chart(diamonds)
  .encode(
    x='clarity',
    y=alt.Y('count()'),
    color='clarity',
    column='cut'
    )
  .mark_bar())

chart.properties()

In [None]:
#Interactive bar maps from a single data source
# the graph shows the population of an african continent

import altair as alt
import geopandas as gpd
import gpdvega

alt.renderers.enable('notebook') # render for Jupyter Notebook

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

brush = alt.selection_single(encodings=["y"],on="mouseover", empty='none')
color = alt.Color('pop_est', scale= alt.Scale(type='pow', exponent=0.4))

alt.hconcat(
    alt.Chart().mark_bar().encode(
        x=alt.X('pop_est', scale=alt.Scale(nice=False)),
        y=alt.Y('name', sort=alt.EncodingSortField(field='pop_est',
                                            op='sum', order='descending')),
        tooltip=['name','pop_est','gdp_md_est'],
        color=alt.condition(brush, alt.value('lightgray'), color)
        ).add_selection(
            brush
        ).properties(
            width=200,
    
            height=450
        ),
    alt.Chart().mark_geoshape().project().encode(
        color=alt.condition(
            brush,
            alt.value('lightgray'),
            color,
        ),
        tooltip=['name','pop_est','gdp_md_est'],
        ).properties(
            width=300,
            height=450,
            title='Africa population'
        ),
    data=world[world.continent == 'Africa']
)

In [1]:
import altair as alt
import geopandas as gpd
import gpdvega

alt.renderers.enable('notebook') # render for Jupyter Notebook
alt.data_transformers.enable(consolidate_datasets=False) # altair issue #1091

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

alt.Chart(
    data = gpdvega.geojson_feature( # converting to GeoJSON object
                world[world.continent=='Africa'],
                "features" # split collection of features into objects
        )
).mark_geoshape(
).project(
).encode(
    fill = alt.Color('id:N',legend=None),
    # data values are stored under nested `properties` object
    tooltip=['properties.name:O'],
).properties(
    width=500,
    height=300
)

ValueError: 
To use the 'notebook' renderer, you must install the vega package
and the associated Jupyter extension.
See https://altair-viz.github.io/getting_started/installation.html
for more information.


In [2]:
# world map view
import altair as alt
from vega_datasets import data

# Data generators for the background
sphere = alt.sphere()
graticule = alt.graticule()

# Source of land data
source = alt.topo_feature(data.world_110m.url, 'countries')

# Layering and configuring the components
alt.layer(
    alt.Chart(sphere).mark_geoshape(fill='lightblue'),
    alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),
    alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black')
).project(
    'naturalEarth1'
).properties(width=600, height=400).configure_view(stroke=None)

In [3]:
import altair as alt
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

base = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
).properties(
    width=300,
    height=180
)

projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic']
charts = [base.project(proj).properties(title=proj)
          for proj in projections]

alt.concat(*charts, columns=2)