In [131]:
import os
import pandas as pd
import cv2
import lxml.html
import numpy as np
from pprint import pprint

In [7]:
folder = 0
filepath = f"score_prediction/{folder}"

image = cv2.imread(f"{filepath}/sample.png")
paths_df = pd.read_csv(f"{filepath}/paths.csv", delim_whitespace=True)
matrix_df = pd.read_csv(f"{filepath}/matrix.csv", delim_whitespace=True)

In [93]:
selected_paths = {}
for i, row in paths_df.iterrows():
    word = row['word']
    path = eval(row['path'])
    score = row['score']
    
    selected_paths[word] = path

In [103]:
# open as html
with open(f"{filepath}/results.html", "r") as f:
    results_html_string = f.read()
    results_html = lxml.html.fromstring(results_html_string)
    
# fetch scores
scores = {}
score_list = []
rows = results_html.cssselect("div.points-result")[0]
for row in rows:
    word = row.cssselect("div.word")[0].text_content()
    points = int(row.cssselect("div.points")[0].cssselect("div.left")[0].text_content())
    score_list.append((word, points))
    scores[word] = points

In [87]:
score_df = pd.DataFrame(score_list, columns=["word", "points"])
score_df.to_csv(f"{filepath}/actual_scores.csv", sep=" ")

In [79]:
class Cell:
    def __init__(self):
        self.value = 0
        self.char = ""
        self.bonus = ""
    
    def __str__(self):
        return f"{self.bonus} {self.char} {self.value}"
    
    def __repr__(self):
        return self.__str__()

In [81]:
def construct_matrix(matrix_df):
    cells = [Cell() for _ in range(16)]
    cells = np.array(cells).reshape((4, 4))
    
    for i, row in matrix_df.iterrows():
        index = row['index']
        index = eval(index)
        
        char = row['character']
        value = row['value']
        bonus = row['bonus']
        
        cell = cells[index]
        cell.char = char
        cell.value = value
        cell.bonus = bonus
    
    return cells

In [82]:
matrix = construct_matrix(matrix_df)

In [95]:
def get_dictionary_fix(scores, selected_paths):
    invalid_words = []
    missed_words = []

    for word in scores:
        if word not in selected_paths:
            missed_words.append(word)
            
    for word in selected_paths:
        # then word is invalid
        if word not in scores:
            invalid_words.append(word)
            continue
    
    return invalid_words, missed_words

In [99]:
def predict_scores(calculator):
    predicted_scores = {}

    for word, path in selected_paths.items():
        # then word is invalid
        if word not in scores:
            continue

        # if valid word then compute
        # take in the path, matrix
        score = calculator(path, matrix)
        predicted_scores[word] = score
    
    return predicted_scores

In [138]:
def sze_calculator(path, matrix):
    
    
    length_mapping = {2: 3, 3: 4, 4: 6, 5: 9, 6: 11, 7: 14}
    
    
    word_multipliers = 1
    product_sum = 0 # letter value * letter multiplier
    word_length = len(path)
    constant = length_mapping.get(word_length, 2*word_length)
    
    for character, value, letter_multiplier, word_multiplier in zip(*extract_metadata(path)):
        word_multipliers *= word_multiplier
        product_sum += value
            
    return (product_sum * word_multipliers) + constant

In [139]:
def extract_metadata(path):
    values = []
    word_multipliers = []
    letter_multipliers = []
    characters = []
    
    for index in path:
        cell = matrix[index]
        values.append(cell.value)
        characters.append(cell.char)
        
        bonus = cell.bonus
        if bonus == '3W':
            word_multipliers.append(3)
        elif bonus == '2W':
            word_multipliers.append(2)
        else:
            word_multipliers.append(1)
        
        if bonus == '2L':
            letter_multipliers.append(2)
        elif bonus == '3L':
            letter_multipliers.append(3)
        else:
            letter_multipliers.append(1)
        
        
    
    return characters, values, letter_multipliers, word_multipliers

In [140]:
predicted_scores = predict_scores(sze_calculator)
comparison_data = []

length_mapping = {2: 3, 3: 4, 4: 6, 5: 9, 6: 11, 7: 14}

for word in predicted_scores:
    target_score = scores[word]
    predicted_score = predicted_scores[word]
    error = target_score-predicted_score
    
    path = selected_paths[word]
    characters, values, letter_multipliers, word_multipliers = extract_metadata(path)
    length = len(word)
    
    #length_mapping.setdefault(length, error)
    
    comparison_data.append((word, target_score, predicted_score, error, length, values, letter_multipliers, word_multipliers))
    
    
    
headers = ["word", "target", "predicted", "error", "length", "values", "letter_multipliers", "word_multipliers"]

df = pd.DataFrame(comparison_data, columns=headers)

In [141]:
df

Unnamed: 0,word,target,predicted,error,length,values,letter_multipliers,word_multipliers
0,jatos,81,81,0,5,"[8, 1, 1, 1, 1]","[1, 1, 1, 1, 1]","[1, 1, 1, 3, 2]"
1,amatols,74,74,0,7,"[1, 3, 1, 1, 1, 2, 1]","[1, 1, 1, 1, 1, 2, 1]","[1, 2, 1, 1, 3, 1, 1]"
2,amatol,65,65,0,6,"[1, 3, 1, 1, 1, 2]","[1, 1, 1, 1, 1, 2]","[1, 2, 1, 1, 3, 1]"
3,james,65,65,0,5,"[8, 1, 3, 1, 1]","[1, 1, 1, 1, 1]","[1, 1, 2, 1, 2]"
4,losses,59,59,0,6,"[2, 1, 1, 1, 1, 2]","[2, 1, 1, 1, 1, 2]","[1, 3, 1, 2, 1, 1]"
5,santols,62,62,0,7,"[1, 1, 1, 1, 1, 2, 1]","[1, 1, 1, 1, 1, 2, 1]","[2, 1, 1, 1, 3, 1, 1]"
6,matlo,57,57,0,5,"[3, 1, 1, 2, 1]","[1, 1, 1, 2, 1]","[2, 1, 1, 1, 3]"
7,siamese,54,54,0,7,"[2, 1, 1, 3, 1, 1, 1]","[2, 1, 1, 1, 1, 1, 1]","[1, 1, 1, 2, 1, 2, 1]"
8,loses,51,51,0,5,"[2, 1, 1, 1, 2]","[2, 1, 1, 1, 2]","[1, 3, 2, 1, 1]"
9,messias,54,54,0,7,"[3, 1, 1, 1, 1, 1, 2]","[1, 1, 1, 1, 1, 1, 2]","[2, 1, 1, 2, 1, 1, 1]"


In [113]:
df.to_csv(f"{filepath}/sze_calculator_predictions.csv", sep=" ")