In [3]:
import altair as alt
import pandas as pd

In [4]:
alt.data_transformers.enable('csv')

DataTransformerRegistry.enable('csv')

In [17]:
url = 'Documents\DataAcademy\dataset_diabetes/diabetic_data.csv'

data = pd.read_csv(url)
#data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101766 entries, 0 to 101765
Data columns (total 50 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   encounter_id              101766 non-null  int64 
 1   patient_nbr               101766 non-null  int64 
 2   race                      101766 non-null  object
 3   gender                    101766 non-null  object
 4   age                       101766 non-null  object
 5   weight                    101766 non-null  object
 6   admission_type_id         101766 non-null  int64 
 7   discharge_disposition_id  101766 non-null  int64 
 8   admission_source_id       101766 non-null  int64 
 9   time_in_hospital          101766 non-null  int64 
 10  payer_code                101766 non-null  object
 11  medical_specialty         101766 non-null  object
 12  num_lab_procedures        101766 non-null  int64 
 13  num_procedures            101766 non-null  int64 
 14  num_

## Dashboard creation using Altair
### To create a dashboard, we must first create the component parts such as charts, bars, etc. and combine them altogether to make a dashboard

In [14]:
chart = alt.Chart(data).mark_bar().encode(x = 'num_medications', y = 'gender')
chart

### Generating a Histogram

In [16]:
x = 'num_medications:Q'
y = 'count()'         # The count() function generates the total number of records

hist = alt.Chart(data).mark_bar().encode(alt.X(x, bin = True), y = 'count()')
hist

#### We can add beauty to the graph using the 'color =' and assigning the argument color to it

In [18]:
colour = 'race:N'
hist_colour = hist.encode(color = colour)
hist_colour


### Generating a scatterplot

In [20]:
scatter = alt.Chart(data).mark_circle().encode(alt.X('num_medications:Q'), alt.Y('num_lab_procedures:Q'),
            color = 'insulin:N')
scatter

### To see the distribution within our plotted histogram, we can use the facet() function

In [22]:
# First, we build a base
# Second, we build a faceted chart by using the facet() function

base = alt.Chart(data).mark_bar().encode(alt.X('num_medications:Q', bin = True), alt.Color('race:N', legend = None), 
        y = 'count()').properties(width = 300, height = 300)
base

In [23]:
column = 'race:N'
facet_hist = base.facet('race:N')
facet_hist

### Generating a heatmap

In [24]:
# The first heatmap shows the mean number of diagnoses done within each race and age
Heatmap = alt.Chart(data).mark_rect().encode(x = 'age', y = 'race', color = 'mean(number_diagnoses)')
Heatmap

In [35]:
# We can easily customise our heatmap by changing dimensions and colour schemes

dim = 300

Customized_heatmap = Heatmap.encode(color = alt.Color('mean(number_diagnoses)', scale = alt.Scale(scheme = 'goldred')))
Heatmap1 = Customized_heatmap.properties(width = dim, height = dim * 0.4)
Heatmap1


### Dashboard

In [41]:
# We can build dashboards when we compose charts into metacharts using the '|' operator to orient the display in a horizontal position

metachart = (hist_colour & scatter).resolve_scale(color = 'independent')
metachart

In [42]:
Dashboard = metachart | Heatmap1
Dashboard

## The design choice
#### The following charts were used due to the following reasons:
#### - Scatterplot: It was used to show the density of distributions of patients that received insulin. Scatterplots also shows the relationships between two variables.
#### - Heatmap: It was used to show the concentration of activities within different race/ethnicity and age groups of those that received a certain number of diagnoses
#### - Stacked Histogram: It was used to show the distribution of medications within the different races.

## Decision
#### The data at hand influenced our choice of design. Basically, using a either a scatterplot or heatmap, can tell more story about density or concentration of activities within a particular data.

## Insight
#### The heatmap showed that patients within the age group of 70-100 had more diagnoses for disease and Caucasians had more disease cases.
#### The histogram showed that Caucasians received more medications than the other ethnic groups. AfricanAmericans were the second ethnic group that received more medications