# Categorical Data Plot

## Imports

In [1]:
import pickle
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap, dodge
import math
from bokeh.layouts import column, row, grid, gridplot

In [2]:
from bokeh.io import output_notebook

In [3]:
output_notebook()

In [4]:
with open("2022testdata-qm20.pickle", "rb") as pfile:
    data = pickle.load(pfile)

In [5]:
measures = data['measures'] 
matches = data['matches']

In [6]:
measures.head(5)

Unnamed: 0,match,team_number,phase,task,measure1,measure2,measure_type
0,qm1,frc4683,auto,start_pos,center,-1,categorical
1,qm1,frc4131,auto,start_pos,left,-1,categorical
2,qm1,frc4131,auto,taxi,true,-1,boolean
3,qm1,frc4683,auto,taxi,true,-1,boolean
4,qm1,frc4131,auto,start_cargo,true,-1,boolean


In [7]:
matches.head(5)

Unnamed: 0,match,match_time,alliance,station,team_number
0,f1m1,2020-03-01 23:20:22,blue,1,frc2930
1,f1m1,2020-03-01 23:20:22,blue,2,frc2976
2,f1m1,2020-03-01 23:20:22,blue,3,frc4918
3,f1m1,2020-03-01 23:20:22,red,1,frc4911
4,f1m1,2020-03-01 23:20:22,red,2,frc2910


## Filtering

In [8]:
# finds last match in measures
last_row = measures.iloc[-1]
lm = last_row[0]
last_match = int(lm.split('m')[1])
# filters matches to qm matches only and converts to numbered matches
qmmatch = matches[matches.match.str.startswith('qm') == True].copy()
new = qmmatch['match'].str.split('m', expand = True)
qmmatch['match_number'] = new[1]
qmmatch['match_number'] = pd.to_numeric(qmmatch['match_number'])

In [9]:
# filters qmmatches to matches before 20
lmatches = qmmatch[qmmatch.match_number < last_match]
lmatches

Unnamed: 0,match,match_time,alliance,station,team_number,match_number
66,qm1,2020-02-29 19:06:04,blue,1,frc1318,1
67,qm1,2020-02-29 19:06:04,blue,2,frc4089,1
68,qm1,2020-02-29 19:06:04,blue,3,frc8059,1
69,qm1,2020-02-29 19:06:04,red,1,frc4131,1
70,qm1,2020-02-29 19:06:04,red,2,frc4683,1
...,...,...,...,...,...,...
505,qm9,2020-02-29 20:32:08,blue,2,frc1318,9
506,qm9,2020-02-29 20:32:08,blue,3,frc4512,9
507,qm9,2020-02-29 20:32:08,red,1,frc2930,9
508,qm9,2020-02-29 20:32:08,red,2,frc3268,9


In [10]:
matches_played = (lmatches.groupby('team_number')
                  .size()
                  .reset_index()
                  .set_index('team_number')
                  .rename({0: 'matches_played'}, axis='columns')
                 )

In [11]:
measures = measures[measures.team_number.str.startswith('frc') == True]
measures = measures[measures.task == 'start_pos']
measures

Unnamed: 0,match,team_number,phase,task,measure1,measure2,measure_type
0,qm1,frc4683,auto,start_pos,center,-1,categorical
1,qm1,frc4131,auto,start_pos,left,-1,categorical
7,qm1,frc2412,auto,start_pos,right,-1,categorical
28,qm1,frc1318,auto,start_pos,right,-1,categorical
43,qm1,frc4089,auto,start_pos,center,-1,categorical
...,...,...,...,...,...,...,...
1106,qm20,frc4180,auto,start_pos,left,-1,categorical
1108,qm20,frc1899,auto,start_pos,right,-1,categorical
1111,qm20,frc2412,auto,start_pos,right,-1,categorical
1112,qm20,frc492,auto,start_pos,center,-1,categorical


In [12]:
pichart = (
    measures.groupby(['team_number', 'measure1'])
    .size()
    .unstack("measure1")
    .merge(matches_played, on='team_number')
)
pichart.head(5)

Unnamed: 0_level_0,center,left,right,matches_played
team_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
frc1294,1.0,,2.0,3
frc1318,,1.0,2.0,3
frc1778,1.0,,1.0,3
frc1899,2.0,,2.0,3
frc2412,1.0,,3.0,3


In [13]:
startingpos = pichart.copy()

In [14]:
startingpos['center'] = startingpos.center / startingpos.matches_played
startingpos['left'] = startingpos.left / startingpos.matches_played
startingpos['right'] = startingpos.right / startingpos.matches_played

In [15]:
startingpos.head(5)

Unnamed: 0_level_0,center,left,right,matches_played
team_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
frc1294,0.333333,,0.666667,3
frc1318,,0.333333,0.666667,3
frc1778,0.333333,,0.333333,3
frc1899,0.666667,,0.666667,3
frc2412,0.333333,,1.0,3


In [17]:
palette = ["#c9d9d3", "#718dbf", "#e84d60"]
team_numbers = list(startingpos.index)
positions = ['center', 'left', 'right']

x = [ (team_number, position) for team_number in team_numbers for position in positions ]
counts = list(zip(startingpos['center'], startingpos['left'], startingpos['right'])) # like an hstack

source = ColumnDataSource(startingpos)
p = figure(x_range=team_numbers, y_range=(0, 1.2), title="Positions by Team", plot_width = 1100,
           height=400, toolbar_location=None, tools="")

p.vbar(x=dodge('team_number', -0.2, range=p.x_range), top='center', source=source, width=0.15,
       color="#c9d9d3", legend_label = "center")

p.vbar(x=dodge('team_number', 0, range=p.x_range), top='left', source=source, width=0.15,
       color="#718dbf", legend_label = "left")

p.vbar(x=dodge('team_number', 0.2, range=p.x_range), top='right', source=source, width=0.15,
       color="#e84d60", legend_label = "right")

p.x_range.range_padding = 0.02
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
p.xaxis.major_label_orientation = math.pi/4


show(p)

In [17]:
pi2 = pichart.drop('matches_played', axis=1)
team1294 = pi2.iloc[0]

In [18]:
#for index, row in pi2.iterrows():
#   print(row)

In [25]:
# finds last match in measures
last_row = measures.iloc[-1]
lm = last_row[0]
last_match = int(lm.split('m')[1])
# filters matches to qm matches only and converts to numbered matches
qmmatch = matches[matches.match.str.startswith('qm') == True].copy()
new = qmmatch['match'].str.split('m', expand = True)
qmmatch['match_number'] = new[1]
qmmatch['match_number'] = pd.to_numeric(qmmatch['match_number'])

# filters qmmatches to matches before 20
lmatches = qmmatch[qmmatch.match_number < last_match]
lmatches

matches_played = (lmatches.groupby('team_number')
                  .size()
                  .reset_index()
                  .set_index('team_number')
                  .rename({0: 'matches_played'}, axis='columns')
                 )

measures = measures[measures.team_number.str.startswith('frc') == True]
measures = measures[measures.task == 'start_pos']

pichart = (
    measures.groupby(['team_number', 'measure1'])
    .size()
    .unstack("measure1")
    .merge(matches_played, on='team_number')
)

pi2 = pichart.drop('matches_played', axis=1)
plist = []
pgrid = []
count = 0
for index, row in pi2.iterrows():
    team = row
    data = team.reset_index(name='value').rename(columns={'index': 'position'}).fillna(0)
    
    data['angle'] = data['value']/data['value'].sum() * 2*pi
    data['color'] = Set3[len(team)]
    
    p = figure(height=350, title= team.name, toolbar_location=None,
           tools="hover", tooltips="@position: @value", x_range=(-0.5, 1.0))

    p.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), 
            end_angle=cumsum('angle'),line_color="white", fill_color='color', 
            legend_field='position', source=data)
    
    p.axis.axis_label = None
    p.axis.visible = False
    p.grid.grid_line_color = None
    
    plist.append(p)
    count = count + 1
    if count%6==0:
        pgrid.insert(int(count/6) - 1, plist)
        plist = []
pgrid.insert(int(count/6), plist)
grid = gridplot(pgrid, width = 250, height = 250)
show(grid)

In [None]:
pichart.head(5)

In [None]:
len(team)

In [20]:
from math import pi

import pandas as pd

from bokeh.palettes import Set3
from bokeh.plotting import figure, show
from bokeh.transform import cumsum

data = team1294.reset_index(name='value').rename(columns={'index': 'position'}).fillna(0)
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Set3[len(team1294)]

p = figure(height=350, title= team1294.name, toolbar_location=None,
           tools="hover", tooltips="@position: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='position', source=data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

In [21]:
team.name

'frc1294'

In [22]:
dt = team.reset_index(name='value').rename(columns={'index': 'position'}).fillna(0)
dt

Unnamed: 0,position,value
0,center,1.0
1,left,0.0
2,right,2.0


In [23]:
x = {
    'United States': 157,
    'United Kingdom': 93,
    'Japan': 89,
    'China': 63,
    'Germany': 44,
    'India': 42,
    'Italy': 40,
    'Australia': 35,
    'Brazil': 32,
    'France': 31,
    'Taiwan': 31,
    'Spain': 29
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index': 'country'})
data

Unnamed: 0,country,value
0,United States,157
1,United Kingdom,93
2,Japan,89
3,China,63
4,Germany,44
5,India,42
6,Italy,40
7,Australia,35
8,Brazil,32
9,France,31


In [24]:
import pickle
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap, dodge
from math import pi
from bokeh.layouts import column, row, grid, gridplot
from bokeh.palettes import Set3
from bokeh.plotting import figure, show
from bokeh.transform import cumsum

import viewer.app.data as data 

class StartingPos:
    def __init__(self):
        self.layout = None

    def prepare_data(self, matches, measures):
        # finds last match in measures
        last_row = measures.iloc[-1]
        lm = last_row[0]
        last_match = int(lm.split('m')[1])
        # filters matches to qm matches only and converts to numbered matches
        qmmatch = matches[matches.match.str.startswith('qm') == True].copy()
        new = qmmatch['match'].str.split('m', expand = True)
        qmmatch['match_number'] = new[1]
        qmmatch['match_number'] = pd.to_numeric(qmmatch['match_number'])

        # filters qmmatches to matches before last match in measures
        lmatches = qmmatch[qmmatch.match_number < last_match]
        lmatches

        matches_played = (lmatches.groupby('team_number')
                        .size()
                        .reset_index()
                        .set_index('team_number')
                        .rename({0: 'matches_played'}, axis='columns')
                        )

        measures = measures[measures.team_number.str.startswith('frc') == True]
        measures = measures[measures.task == 'start_pos']

        pichart = (
            measures.groupby(['team_number', 'measure1'])
            .size()
            .unstack("measure1")
            .merge(matches_played, on='team_number')
        )

        pi2 = pichart.drop('matches_played', axis=1)
        return pi2

    def graph_plot(self):
        measures = data.get_data()[0]
        matches = data.get_data()[1]
        pi2 = self.prepare_data(matches, measures)

        plist = []
        pgrid = []
        count = 0
        for index, row in pi2.iterrows():
            team = row
            data = team.reset_index(name='value').rename(columns={'index': 'position'}).fillna(0)
            
            data['angle'] = data['value']/data['value'].sum() * 2*pi
            data['color'] = Set3[len(team)]
            
            p = figure(height=350, title= team.name, toolbar_location=None,
                tools="hover", tooltips="@position: @value", x_range=(-0.5, 1.0))

            p.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), 
                    end_angle=cumsum('angle'),line_color="white", fill_color='color', 
                    legend_field='position', source=data)
            
            p.axis.axis_label = None
            p.axis.visible = False
            p.grid.grid_line_color = None
            
            plist.append(p)
            count = count + 1
            if count%6==0:
                pgrid.insert(int(count/6) - 1, plist)
                plist = []
        pgrid.insert(int(count/6), plist)
        return pgrid
        
    def get_layout(self):
        pgrid = self.graph_plot()
        self.layout = gridplot(pgrid, width = 250, height = 250)
        return self.layout

ModuleNotFoundError: No module named 'viewer'