# Using the data to make predictions

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotnine as gg

from CurlingDB import CurlingDB
from utils.PlotnineElements import PlotnineElements as pe, blank

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# Create Database Connection

In [2]:
db = CurlingDB(db_name='world_curling_ss.db')

# Exploration

## Get the hammer of each end

Methods to get the hammer
- If mens or womens teams then can try to grab the colour of the rock in the first frame
- If mixed doubles then can try to grab the majority from the first frame
- If not the first frame then can take be the colour that didn't score points on the last
  - If no points then go further back
- 

In [91]:
# game = ('CU_WMCC2016P', 'SWE', 'JPN', 'Mens_Teams')
game = ('CUR_WWCC2017P', 'RUS', 'DEN', 'Womens_Teams')

In [92]:
hammer_query = """
SELECT
    e.end_id,
    e.num,
    e.hammer_colour,
    e.direction
FROM 
    Throw t
JOIN
    End e
ON
    t.end_id = e.end_id
JOIN
    Match m
ON
    e.match_id = m.match_id
JOIN
    Event e2
ON
    m.event_id = e2.event_id
WHERE
    t.throw_num = 1
    AND
    e2.abbrev = ?
    AND
    m.team_1 = ?
    AND
    m.team_2 = ?
    AND
    m.type = ?
"""
db.execute_query(hammer_query, game)
false_colours = pd.DataFrame(db.cursor.fetchall(), columns=['end_id', 'end_num', 'db_hammer_colour', 'direction'])
false_colours

Unnamed: 0,end_id,end_num,db_hammer_colour,direction
0,148,1,yellow,down
1,149,2,yellow,up
2,150,3,yellow,up
3,151,4,red,down
4,152,5,yellow,up
5,153,6,red,up
6,154,7,yellow,down
7,155,8,red,down
8,156,9,yellow,down
9,157,10,yellow,up


In [93]:
# 'red' if the team1score - team2score > 0, 'yellow' if < 0, lag by one, orderby end_num, partition by match_id of colour
# teamscore is team_1_final_score - LAG(team_1_final_score) PARTITION BY match_id ORDER BY e.num
hammer_query2 = """
SELECT
    e.end_id,
    e.num,
    CASE
        s.colour
        WHEN
            'red'
        THEN
            'yellow'
        WHEN
            'yellow'
        THEN
            'red'
        ELSE
            CASE 
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    > 0
                THEN
                    'yellow'
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    < 0
                THEN
                    'red'
                ELSE
                    CASE
                        WHEN
                            ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                            (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                            > 0
                        THEN
                            'yellow'
                        WHEN
                            ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                            (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                            < 0
                        THEN
                            'red'
                        ELSE
                            'incon'
                        END
                END
            END,
    CASE
        s.colour
        WHEN
            'red'
        THEN
            'yellow'
        WHEN
            'yellow'
        THEN
            'red'
        ELSE
            'incon'
        END,
    CASE 
        WHEN
            ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
            (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
            > 0
        THEN
            'yellow'
        WHEN
            ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
            (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
            < 0
        THEN
            'red'
        ELSE
            CASE
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    > 0
                THEN
                    'yellow'
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    < 0
                THEN
                    'red'
                ELSE
                    'incon'
                END
        END
FROM
    Stone s
RIGHT JOIN
    Position p
ON
    s.position_id = p.position_id
JOIN
    End e
ON
    p.end_id = e.end_id
JOIN
    Match m
ON
    e.match_id = m.match_id
JOIN 
    Event e2
ON
    m.event_id = e2.event_id
WHERE
    m.type IN ('Mens_Teams', 'Womens_Teams')
    AND
    p.frame_num = 1
    AND
    e2.abbrev = ?
    AND
    m.team_1 = ?
    AND
    m.team_2 = ?
    AND
    m.type = ?
"""
db.execute_query(hammer_query2, game)
pred_colours = pd.DataFrame(db.cursor.fetchall(), columns=['end_id', 'end_num', 'pred_hammer_colour', 'frame_based', 'prev_based'])
pred_colours

Unnamed: 0,end_id,end_num,pred_hammer_colour,frame_based,prev_based
0,148,1,yellow,yellow,incon
1,149,2,yellow,yellow,incon
2,150,3,yellow,yellow,incon
3,151,4,red,red,red
4,152,5,yellow,yellow,yellow
5,153,6,yellow,yellow,yellow
6,154,7,red,red,red
7,155,8,yellow,yellow,yellow
8,156,9,yellow,yellow,yellow
9,157,10,red,red,red


In [95]:
pred_colours.drop(columns=['frame_based', 'prev_based']).merge(false_colours.drop(columns=['end_num', 'direction']), on='end_id')

Unnamed: 0,end_id,end_num,pred_hammer_colour,db_hammer_colour
0,148,1,yellow,yellow
1,149,2,yellow,yellow
2,150,3,yellow,yellow
3,151,4,red,red
4,152,5,yellow,yellow
5,153,6,yellow,red
6,154,7,red,yellow
7,155,8,yellow,red
8,156,9,yellow,yellow
9,157,10,red,yellow


In [96]:
hammer_query = """
SELECT
    e.end_id,
    e.num,
    e.hammer_colour,
    e.direction
FROM 
    Throw t
JOIN
    End e
ON
    t.end_id = e.end_id
JOIN
    Match m
ON
    e.match_id = m.match_id
JOIN
    Event e2
ON
    m.event_id = e2.event_id
WHERE
    t.throw_num = 1
    AND
    m.type IN ('Mens_Teams', 'Womens_Teams')
"""
db.execute_query(hammer_query)
false_colours = pd.DataFrame(db.cursor.fetchall(), columns=['end_id', 'end_num', 'db_hammer_colour', 'direction'])
false_colours

Unnamed: 0,end_id,end_num,db_hammer_colour,direction
0,1,1,red,down
1,2,2,yellow,up
2,3,3,red,up
3,4,4,yellow,down
4,5,5,yellow,up
...,...,...,...,...
11467,18178,6,yellow,up
11468,18179,7,red,down
11469,18180,8,red,down
11470,18181,9,red,down


In [97]:
# 'red' if the team1score - team2score > 0, 'yellow' if < 0, lag by one, orderby end_num, partition by match_id of colour
# teamscore is team_1_final_score - LAG(team_1_final_score) PARTITION BY match_id ORDER BY e.num
hammer_query2 = """
SELECT
    e.end_id,
    e.num,
    CASE
        s.colour
        WHEN
            'red'
        THEN
            'yellow'
        WHEN
            'yellow'
        THEN
            'red'
        ELSE
            CASE 
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    > 0
                THEN
                    'yellow'
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    < 0
                THEN
                    'red'
                ELSE
                    CASE
                        WHEN
                            ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                            (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                            > 0
                        THEN
                            'yellow'
                        WHEN
                            ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                            (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                            < 0
                        THEN
                            'red'
                        ELSE
                            'incon'
                        END
                END
            END,
    CASE
        s.colour
        WHEN
            'red'
        THEN
            'yellow'
        WHEN
            'yellow'
        THEN
            'red'
        ELSE
            'incon'
        END,
    CASE 
        WHEN
            ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
            (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
            > 0
        THEN
            'yellow'
        WHEN
            ((COALESCE(LAG(e.team_1_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
            (COALESCE(LAG(e.team_2_final_score, 1) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
            < 0
        THEN
            'red'
        ELSE
            CASE
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    > 0
                THEN
                    'yellow'
                WHEN
                    ((COALESCE(LAG(e.team_1_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_1_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)) -
                    (COALESCE(LAG(e.team_2_final_score, 2) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0) - COALESCE(LAG(e.team_2_final_score, 3) OVER (PARTITION BY m.match_id ORDER BY e.end_id), 0)))
                    < 0
                THEN
                    'red'
                ELSE
                    'incon'
                END
        END
FROM
    Stone s
RIGHT JOIN
    Position p
ON
    s.position_id = p.position_id
JOIN
    End e
ON
    p.end_id = e.end_id
JOIN
    Match m
ON
    e.match_id = m.match_id
JOIN 
    Event e2
ON
    m.event_id = e2.event_id
WHERE
    m.type IN ('Mens_Teams', 'Womens_Teams')
    AND
    p.frame_num = 1
"""
db.execute_query(hammer_query2)
pred_colours = pd.DataFrame(db.cursor.fetchall(), columns=['end_id', 'end_num', 'pred_hammer_colour', 'frame_based', 'prev_based'])
pred_colours

Unnamed: 0,end_id,end_num,pred_hammer_colour,frame_based,prev_based
0,1,1,incon,incon,incon
1,2,2,red,red,red
2,3,3,yellow,yellow,yellow
3,4,4,yellow,yellow,yellow
4,5,5,yellow,yellow,yellow
...,...,...,...,...,...
11467,18178,6,red,incon,red
11468,18179,7,yellow,incon,yellow
11469,18180,8,red,red,red
11470,18181,9,yellow,incon,yellow


In [100]:
all_colours = pred_colours.drop(columns=['frame_based', 'prev_based']).merge(false_colours.drop(columns=['end_num', 'direction']), on='end_id')
all_colours[['pred_hammer_colour', 'db_hammer_colour']].value_counts()

pred_hammer_colour  db_hammer_colour
yellow              yellow              3259
red                 red                 3192
yellow              red                 2442
red                 yellow              2378
incon               yellow               104
                    red                   97
dtype: int64