In [2]:
"""
canonical imports
"""
import pandas as pd
import numpy as np
import xlsx
import bamboos
import altair as alt
import bquer
import bquer.magic
import veggy
from senddf import senddf, recvdf


# Section 0: Utils

## Disabling Max Rows

In [None]:
alt.data_transformers.disable_max_rows()

## Improving Quality of Visualizations of a scale factor

In [None]:
alt.renderers.enable("png", scale_factor=3.0)

## Sharing plots

In [None]:
# General
chart= alt.Chart()# etc...

#share
shared_plot = chart.vshare(filename="Filename", width=1000, height=600)

# get the url
print(shared_plot.url)

# show the embedded plot
shared_plot

# Section 1: General 

## The Axis

In [28]:
# The axis argument is used to pass specification on the axis. Mainly Lables Angle and Format
# Format can be specified using 0.x where x is the number of decimals plus % or f 

source=pd.DataFrame(data={'a':[1,2,3,4,5,6],'b':[6,5,4,3,2,1]})

(alt.Chart(source)
    .mark_line()
    .encode(
        x=alt.X('a:Q',axis=alt.Axis(
                                    labelAngle=10,
                                    format='0.4%'
                                    )
                ),
        y=alt.Y('b:Q',axis=alt.Axis(
                                    labelAngle=20,
                                    format='0.5f'
                                    )
               )
        )
)

#To Copy: axis=alt.Axis(labelAngle=,format='')

## The Scale

In [31]:
#Using the scale atribute we can specify the length of the axis

source=pd.DataFrame(data={'a':[1,2,3,4,5,6],'b':[6,5,4,3,2,1]})

(alt.Chart(source)
    .mark_line()
    .encode(
        x=alt.X('a:Q',scale=alt.Scale(domain=[-100,10])
                ),
        y=alt.Y('b:Q',scale=alt.Scale(domain=[0,100])
               )
        )
)

#To Copy: scale=alt.Scale(domain=[,])

## Sort

In [43]:
# Using Sort we can specify the order of the data. For now I only know how to order based on X or Y 

source=pd.DataFrame(data={'a':['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6'],'b':[6,5,4,3,2,1]})

(alt.Chart(source)
    .mark_line()
    .encode(
        x=alt.X('a:N', axis=alt.Axis(labelAngle=0), sort='-x',
                
                ),
        y=alt.Y('b:Q'
               )
        )
    .properties(width=500)
)

# To Copy: sort=''

## The Title 

In [44]:
# Change Axis Title

source=pd.DataFrame(data={'a':['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6'],'b':[6,5,4,3,2,1]})

(alt.Chart(source)
    .mark_line()
    .encode(
        x=alt.X('a:N', title='Bix X Title'
                
                ),
        y=alt.Y('b:Q', title='Bix Y Title'
               )
        )
    .properties(width=500)
)

In [74]:
#Change General Title

source=pd.DataFrame(data={'a':['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6'],'b':[6,5,4,3,2,1]})

chart_title= alt.TitleParams(
             ['Main Title line 1','Main Title Line 2'],
             orient='bottom', #also top,left,right
             anchor='start', #also middle,end
             offset=50, #Makes it further or closer to the plot
             fontSize=40,
             color='green',
             subtitle='subtitle',
             subtitleFontSize=30,
             subtitleColor='red'
)

(alt.Chart(source, title= chart_title)
    .mark_line()
    .encode(
        x=alt.X('a:N', title=None), #Erease axis title
        y=alt.Y('b:Q', title=None)
        )
    .properties(width=500)
)



## Text

In [105]:
# Is possible to add text lable on top of the plots

source=pd.DataFrame(data={'a':[1,2,3,4,5,6],'b':[6,5,4,3,2,1]})

chart=(alt.Chart(source)
            .mark_point()
            .encode(
                x=alt.X('a:Q',axis=alt.Axis(
                                            labelAngle=0,
                                            format='0.1f'
                                            ),
                        scale=alt.Scale(domain=[0,10])
                        ),
                y=alt.Y('b:Q',axis=alt.Axis(
                                            labelAngle=0,
                                            format='0.0f'
                                            ),
                        scale=alt.Scale(domain=[0,10])
                       ),
                
                text=alt.Text('a:Q',format='0.0f') #specify column the text element will be taken from 
                )
        )

chart+chart.mark_text(align='center', dy=20,dx=-10) #Layer the simple plot and the Text 

## Frame Elements

In [129]:
# Is possible to remove specific elements of the plot frame like:
# the Grid, 
# the Domain (Solid Y and X axis lines), 
# the Axes (numbers on the axis), 
# the Stroke (border around the plot)


source=pd.DataFrame(data={'a':[1,2,3,4,5,6],'b':[6,5,4,3,2,1]})

(alt.Chart(source)
    .mark_line()
    .encode(
        x=alt.X('a:Q', axis=None),
        y=alt.Y('b:Q', axis= None)
            ) 
    .configure_axis(grid=False, domain=False)
    .configure_view(stroke=None)
)

## Colors

In [5]:
# It is possible to assign specific colors to different categories in a group 


source=pd.DataFrame(data={'a':['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6'],'b':[6,5,4,3,2,1]})


domain = ['cat_1','cat_2','cat_3','cat_4','cat_5','cat_6']
range_ = ['Black','Orange','Purple','Blue','Green','Yellow']


(alt.Chart(source)
    .mark_bar()
    .encode(
        x=alt.X('a:N', axis=alt.Axis(labelAngle=0), sort='-x',
                
                ),
        y=alt.Y('b:Q'
               ),
        color=alt.Color('a',
                        scale=alt.Scale(domain=domain, range=range_))
        )
    .properties(width=500)
)

# Section 2: Basic Plots

## Dashed Line plot with Points

In [None]:
data={
    'time':[i for i in range(0,50)],
    'money':[i for i in range(1000,0,-20)]

}

source=pd.DataFrame(data=data)

source

In [100]:
chart=(
    alt.Chart(source)
    .mark_line(color='black',
               point=alt.OverlayMarkDef(color='red',
                                        size=50
                                       ),
               strokeWidth=1,
               strokeDash=[12, 6]
               
               
              )
    .encode(
        x=alt.X('time:Q'),
        y=alt.Y('money:Q')
    )
    
)

chart

## Bar Chart

In [74]:
data={
    'case':[0,1,2,4,5,6,7,8,9,10],
    'perc':[.12,.14,.11,.11,.3,.7,.4,.5,.3,.2],

}

source=pd.DataFrame(data=data)

source

Unnamed: 0,case,perc
0,0,0.12
1,1,0.14
2,2,0.11
3,4,0.11
4,5,0.3
5,6,0.7
6,7,0.4
7,8,0.5
8,9,0.3
9,10,0.2


In [78]:
bars=(alt.Chart(source)
    .mark_bar()
    .encode(
        x=alt.X('perc',
                title='Metric of Interest',
                axis=alt.Axis(format='0.0%'),
                scale=alt.Scale(domain=[0,1])
               ),
        y=alt.Y('case:N',
                title='Test Case',
                axis=alt.Axis(labelAngle=360)
               ),
        color='case:N',
        text=alt.Text('perc:Q', format='0.1%')
    )
)
bars

## Stacked Bar Chart

In [11]:
data={
    'country':['IT','IT','IT','IT','CA','CA','CA','CA','DE','DE','DE','DE'],
    'device':['5-High','4-Medium','3-Low','1-UltraLow','5-High','4-Medium','3-Low','1-UltraLow','5-High','4-Medium','3-Low','1-UltraLow'],
    'perc':[.2,.2,.3,.3,.4,.1,.3,.2,.5,.1,.1,.3]
}

source=pd.DataFrame(data=data)

source

Unnamed: 0,country,device,perc
0,IT,5-High,0.2
1,IT,4-Medium,0.2
2,IT,3-Low,0.3
3,IT,1-UltraLow,0.3
4,CA,5-High,0.4
5,CA,4-Medium,0.1
6,CA,3-Low,0.3
7,CA,1-UltraLow,0.2
8,DE,5-High,0.5
9,DE,4-Medium,0.1


In [16]:
chart=(alt.Chart(source)
       .mark_bar()
       .encode(
            y=alt.Y('country:N',
                    title=None, 
                    axis=alt.Axis(labelAngle=0)
                   ),
            x=alt.X('perc:Q',
                    title=None, 
                    axis=alt.Axis(format='.0%'),
                    sort='y'
                   ),
            color= alt.Color('device:N')
            )
       .properties(height=300,width=700)
      )

chart

## Scatter Plot

In [39]:
data={
    'height':np.random.binomial(n=100, p=.5, size=5000),
    'weight':np.random.binomial(n=1000, p=0.1, size=5000)
}

source=pd.DataFrame(data=data)

source

Unnamed: 0,height,weight
0,44,103
1,55,93
2,38,99
3,47,108
4,56,100
...,...,...
4995,55,121
4996,46,114
4997,57,90
4998,49,98


In [40]:
chart=(alt.Chart(source)
       .mark_circle(size=10)
       .encode(
            x='height:Q',
            y='weight:Q'
           )
      )

chart

## Reference Line

In [None]:
ref_line = alt.Chart(df).mark_rule(color="red",).encode(y=alt.datum(400))

In [None]:
ref_line_1 = (
    alt.Chart(df)
    .mark_rule(color="black", strokeWidth=0.15, strokeDash=[12, 6])
    .encode(x="days_since_install:O")
    .transform_calculate(days_since_install="30")  # Constant value for the line
)

## Error Bars

## Next

# Section 3: Complex Plots

## Horizontal Bar Chart with Lables and Adjusted Opacity

In [137]:
# Data
data= {
    'detected_tier':['1-UltraLow','3-Low','4-Medium','5-High'],
    '7 Day Retention': [.084,.095,.125,.15],
    'opacity':[1,0,0,1]
}

source= pd.DataFrame(data=data)

#Plot

chart= (
    alt.Chart(source)
          .mark_bar()
          .encode(
                y=alt.Y('detected_tier:N',
                        title=None, 
                        axis=alt.Axis(labelAngle=0),
                        sort='-y' ),

                x=alt.X('7 Day Retention:Q',
                        title='7 Day Retention', 
                        axis=None
                       ),

                text=alt.Text('7 Day Retention:Q', 
                              format='0.1%'),

                color= alt.Color('detected_tier:N',
                                 legend=None),
                opacity= alt.condition(
                            alt.datum.opacity==1,
                            alt.value(1),
                            alt.value(.4)
                )
              )
        )

chart_title= alt.TitleParams(
             ['Bar Chart With Lables and Adjusted Opacity'],
             orient='bottom', #also top,left,right
             anchor='start', #also middle,end
             offset=20, #Makes it further or closer to the plot
             fontSize=20,
             color='black',
             subtitle='No X axis, no grid no stroke no domain  ',
             subtitleFontSize=15,
             subtitleColor='black'
)


layer=(alt.layer(chart+chart.mark_text(align='center', dy=-0,dx=20),
                 title=chart_title)
         .configure_axis(grid=False, domain=False)
         .configure_view(stroke=None)
         .properties(height=300,width=500)
      )
        
layer


## Connected Line Plot

In [4]:
# Data

data= {
    'detected_tier':['1-UltraLow','1-UltraLow','1-UltraLow','1-UltraLow','1-UltraLow',
                     '5-High','5-High','5-High','5-High','5-High'],
    'install_country': ['CA','DE','MY','MX','ID','CA','DE','MY','MX','ID'],
    'perc':[.16,.15,.37,.50,.36,.12,.13,.23,.33,.14]
}
source= pd.DataFrame(data=data)
source

Unnamed: 0,detected_tier,install_country,perc
0,1-UltraLow,CA,0.16
1,1-UltraLow,DE,0.15
2,1-UltraLow,MY,0.37
3,1-UltraLow,MX,0.5
4,1-UltraLow,ID,0.36
5,5-High,CA,0.12
6,5-High,DE,0.13
7,5-High,MY,0.23
8,5-High,MX,0.33
9,5-High,ID,0.14


In [5]:
data_text= {
    'install_country':['CA','DE','MY','MX','ID'],
    'diff':[.04,.02,.15,.17,.36],
    'position':[.14,.14,.3,.42,.25]
}

source_text= pd.DataFrame(data=data_text)
source_text

Unnamed: 0,install_country,diff,position
0,CA,0.04,0.14
1,DE,0.02,0.14
2,MY,0.15,0.3
3,MX,0.17,0.42
4,ID,0.36,0.25


In [14]:
line = (alt.Chart(source)
        .mark_line(color='grey')
        .encode(
            x=alt.X('perc:Q',
                    title='', 
                    axis=alt.Axis(format='0.0%'),
                    scale=alt.Scale(domain=[0,1])
                   ),
            y=alt.Y('install_country:N',
                    title=None, 
                    axis=alt.Axis(labelAngle=0),
                    sort='-x'
                   ),
            detail='install_country:N'
                )
        )

domain = ['1-UltraLow', '5-High']
range_ = ['#FF6A00','#169FD9']


points = (alt.Chart(source)
          .mark_point(
            size=50,
            opacity=1,
            filled=True
          )
          .encode(
            x='perc:Q',
            y='install_country:N',
            color=alt.Color('detected_tier',scale=alt.Scale(domain=domain, range=range_))
          )
         )

text= (alt.Chart(source_text)
       .mark_text(align='center', dy=-10,dx=0,color='grey')
       .encode(
          y=alt.Y('install_country:N'),
          x=alt.X('position:Q'),
          text=alt.Text('diff:Q',format='0.0%')
              )
      )

chart_title= alt.TitleParams(
             ['Connected Line Plot'],
             orient='bottom', #also top,left,right
             anchor='start', #also middle,end
             offset=20, #Makes it further or closer to the plot
             fontSize=20,
             color='black',
             subtitle='Grid Domain No Stroke  ',
             subtitleFontSize=15,
             subtitleColor='black'
)


layer= (alt.layer(
            line+points+text,
            title=chart_title
        )
        .configure_axis(grid=True, domain=True)
        .configure_view(stroke=None)
        .properties(width=500,height=500)
       )

layer

## Correlation Matrix

In [179]:
#Data 

data={
    'variableDx':['var1','var1','var1','var2','var2','var2','var3','var3','var3'],
    'variableSx':['var1','var2','var3','var1','var2','var3','var1','var2','var3'],
    'correlation': [1,.5,.3,.5,1,.1,.3,.1,1],
    'correlation_label':[1,.5,.3,.5,1,.1,.3,.1,1] # here is the same but it can be used to make the text prettier
}

source=pd.DataFrame(data=data)

source

Unnamed: 0,variableDx,variableSx,correlation,correlation_label
0,var1,var1,1.0,1.0
1,var1,var2,0.5,0.5
2,var1,var3,0.3,0.3
3,var2,var1,0.5,0.5
4,var2,var2,1.0,1.0
5,var2,var3,0.1,0.1
6,var3,var1,0.3,0.3
7,var3,var2,0.1,0.1
8,var3,var3,1.0,1.0


In [180]:
base = (alt.Chart(source)
        .mark_rect()
        .encode(
            x=alt.X('variableDx',
                    title=None,
                    axis=alt.Axis(labelAngle=-30),
                    sort='x'),
            y=alt.Y('variableSx', 
                    title=None),
            color='correlation:Q'
            )
)

text = (base.mark_text()
        .encode(
            text='correlation_label',
            color=alt.condition(
                alt.datum.correlation > 0.5, 
                alt.value('white'),
                alt.value('black')
                )
            )
       )

layer= alt.layer(base+text).properties(width=500,height=500)

layer

## Faceted Distribution Plots With Reference Line

In [3]:
df = pd.DataFrame(data={'avg_sharks_2021':np.random.binomial(n=100, p=0.5, size=1000)})

months=['1-Jan','2-Feb','3-Mar','4-Apr','5-May','6-Jun','7-Lug','8-Aug','9-Sep','99-Oct','999-Nov','9999-Dec']
df['month']= np.random.randint(low=0,high=12,size=1000)
df['month']= df['month'].apply(lambda x: months[x])

df['conto']=0

print(df)


source=df.groupby(['month','avg_sharks_2021'],as_index=False).count()
source=df.groupby(['month','avg_sharks_2021'],as_index=False).count()

source

     avg_sharks_2021     month  conto
0                 48     4-Apr      0
1                 48  9999-Dec      0
2                 46     4-Apr      0
3                 50    99-Oct      0
4                 51     7-Lug      0
..               ...       ...    ...
995               48     3-Mar      0
996               50     8-Aug      0
997               51     9-Sep      0
998               47     1-Jan      0
999               46     5-May      0

[1000 rows x 3 columns]


Unnamed: 0,month,avg_sharks_2021,conto
0,1-Jan,35,1
1,1-Jan,41,1
2,1-Jan,42,1
3,1-Jan,43,2
4,1-Jan,44,2
...,...,...,...
251,9999-Dec,58,4
252,9999-Dec,60,1
253,9999-Dec,61,2
254,9999-Dec,62,2


In [8]:
chart =(alt.Chart()
        .mark_bar()
        .encode(
            x=alt.X('avg_sharks_2021:Q',
                    title=None),
            y=alt.Y('conto:Q', 
                    title=None,
                    axis=alt.Axis(format='0.0f')
                   ),
            color=alt.Color('month:N',title=None)
        
        ).properties(width=200,height=200)
       )

line = (alt.Chart(pd.DataFrame({'x': [50]}
                              )
                 )
        .mark_rule()
        .encode(x='x')
       )

layer= (alt.layer(chart+line,
                  data=source)
        .facet('month:N',
               columns=4
              )
       
       )

layer

## Bar Chart With Reference Lines (for A/B test Comparisons)

In [58]:
data={
    'case':[0,1,2],
    'perc':[.12,.14,.11],
    'ci_h':[.13,.16,.115],
    'ci_l':[.11,.10,.105]
}

source=pd.DataFrame(data=data)

source

Unnamed: 0,case,perc,ci_h,ci_l
0,0,0.12,0.13,0.11
1,1,0.14,0.16,0.1
2,2,0.11,0.115,0.105


In [72]:
# Plot

bars=(alt.Chart(source)
    .mark_bar()
    .encode(
        x=alt.X('case:N',
                title='Test Case',
                axis=alt.Axis(labelAngle=360)
               ),
        y=alt.Y('perc',
                title='Metric of Interest',
                axis=alt.Axis(format='0.0%'),
                scale=alt.Scale(domain=[0,.3])
               ),
        color='case:N',
        text=alt.Text('perc:Q', format='0.1%')
    )
)

line=(alt.Chart(source)
    .mark_line()
    .encode(
        x='case:N',
        y='ci_l:Q',
        y2='ci_h:Q'
    )
)

layer=(alt.layer(bars,
                 bars.mark_text(align='center', dy=-40),
                 line)
       .properties(height=400,width=200)
      )

layer

## Next