In [1]:
##pip install spacy
##python -m spacy download en_core_web_sm

### Search: rules for glossing in ASL 
ChatGPT: what are the rules for glossing in ASL

### To gloss American Sign Language (ASL) using Python and SpaCy, you would need to set up a system to process the input text according to the rules of ASL glossing. Below are the basic rules for ASL glossing followed by a Python code example using SpaCy.
*** ASL Glossing Rules: ***
1. Uppercase Letters: Write each ASL sign in uppercase letters.\ DONE
2. Non-Manual Signals (NMS): Indicate non-manual signals such as facial expressions or body movements above the glossed sign.\
3. Fingerspelling: Represent fingerspelled words with dashes between each letter.\
4. Lexicalized Fingerspelling: Indicate lexicalized fingerspelling with a # symbol.\
5. Repetition: Show repeated signs with a plus sign (+) after the gloss.\
6. Role Shift: Indicate role shift with "rs" before the gloss.\
7. Indexing/Pointing: Use "ix" followed by a subscript letter or number for indexing.\
8. Directional Signs: Indicate the direction of the sign with arrows or other indicators.\
9. Classifiers: Use abbreviations for classifiers.\
10. Time Indicators: Place time indicators at the beginning of the sentence.\ DONE
11. Topic-Comment Structure: Indicate the topic followed by the comment.\
12. English Words/Concepts: Use English gloss in quotation marks for concepts without direct ASL equivalents.


In [3]:
import spacy
from spacy.scorer import Scorer
import pandas as pd

from sklearn.metrics import precision_score, recall_score, f1_score
from spacy.training.example import Example


### Read CSV Dataset

In [5]:
"""
file_path = 'Input/ASLG_PC12_train.csv/train.csv'
df = pd.read_csv(file_path)

print(f'df.shape: {df.shape}')

df.columns
"""

"\nfile_path = 'Input/ASLG_PC12_train.csv/train.csv'\ndf = pd.read_csv(file_path)\n\nprint(f'df.shape: {df.shape}')\n\ndf.columns\n"

### Load Spacy instance

In [7]:
# Load SpaCy model
## attention de faire: python -m spacy download en_core_web_sm si nécessaire
nlp = spacy.load("en_core_web_sm")

### Example sentences and their corresponding reference glosses

In [8]:
# Example sentences and their corresponding reference glosses
## list of tuples
examples = [
    ("Yesterday, I saw a car and a person.", "YESTERDAY I IX_1 SAW CL:3 AND CL:1."),
    ("I went to the store.", "I IX_1 WENT STORE."),
]

### ASL Gloss standard functions

In [10]:
# ASL glossing rules implemented in functions
def gloss_word(word):
    return word.upper()

def handle_fingerspelling(word):
    return '-'.join(list(word.upper()))

def handle_lexicalized_fingerspelling(word):
    return f"#{word.upper()}"

def handle_repetition(word, count):
    return f"{word.upper()}{'+' * (count - 1)}" if count > 1 else word.upper()

def handle_role_shift(sentence):
    return f"rs {sentence}"

def handle_indexing(token, index):
    return f"ix_{index} {token.upper()}"

def gloss_sentence(doc):
    glossed_sentence = []
    for token in doc:
        glossed_word = gloss_word(token.text)
        glossed_sentence.append(glossed_word)
    return " ".join(glossed_sentence)

def add_time_indicator(doc):
    time_words = ["yesterday", "today", "tomorrow"]
    glossed_sentence = gloss_sentence(doc)
    for word in doc:
        if word.text.lower() in time_words:
            return f"{word.text.upper()} {glossed_sentence.replace(word.text.upper(), '').strip()}"
    return glossed_sentence

def process_sentence(doc):
    nms = {
        "wh-question": "wh-q",
        "yes-no-question": "y/n-q"
    }
    
    classifiers = {
        "car": "CL:3",
        "person": "CL:1"
    }
    
    glossed_sentence = []
    for token in doc:
        word = token.text.lower()
        
        if word in ["i", "me"]:
            glossed_word = handle_indexing("I", 1)
        elif word in ["you"]:
            glossed_word = handle_indexing("YOU", 2)
        elif word in ["yesterday", "today", "tomorrow"]:
            glossed_word = add_time_indicator(doc)
        elif word in classifiers:
            glossed_word = classifiers[word]
        else:
            glossed_word = gloss_word(word)
        
        glossed_sentence.append(glossed_word)
    
    return " ".join(glossed_sentence)


### Evaluation

NB: deterministic model = set of ASL-Gloss-rules functions

In [11]:
# Evaluation function
def evaluate_glossing(examples):
    y_true = []
    y_pred = []
    
    for sentence, reference_gloss in examples:
        doc = nlp(sentence)
        generated_gloss = process_sentence(doc) ## deterministic model = set of ASL-Gloss-rules functions
        
        # Tokenize glosses for comparison
        reference_tokens = reference_gloss.split()
        generated_tokens = generated_gloss.split()
        
        y_true.extend([reference_tokens])
        y_pred.extend([generated_tokens])
        
    return y_true, y_pred


### Flatten the lists for sklearn

### Compute Metrics

In [12]:
# Calculate metrics
def compute_metrics(y_true_, y_pred_):
    precision = precision_score(y_true_, y_pred_, average='weighted', zero_division=1)
    recall = recall_score(y_true_, y_pred_, average='weighted', zero_division=1)
    f1 = f1_score(y_true_, y_pred_, average='weighted', zero_division=1)

    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")

    return {"precision": precision, "recall": recall, "f1_score": f1}


### example

In [13]:
# Example usage
"""
y_true, y_pred = evaluate_glossing(examples)

print(type(y_true))

print(f'y_pred sentence: {y_pred}')

for idx, sentence in enumerate(examples):
    print(f'sentence: {sentence[0]}')
    print(f'y_true sentence: {" ".join(y_true[idx])}')
    print(f'y_pred sentence: {" ".join(y_pred[idx])}')

precision, recall, f1_score = compute_metrics(y_pred, y_pred)
"""

'\ny_true, y_pred = evaluate_glossing(examples)\n\nprint(type(y_true))\n\nprint(f\'y_pred sentence: {y_pred}\')\n\nfor idx, sentence in enumerate(examples):\n    print(f\'sentence: {sentence[0]}\')\n    print(f\'y_true sentence: {" ".join(y_true[idx])}\')\n    print(f\'y_pred sentence: {" ".join(y_pred[idx])}\')\n\nprecision, recall, f1_score = compute_metrics(y_pred, y_pred)\n'