In this demo, we demonstrate how to use the MaxEnt_Reg script. We will be going over how to do the following:

1. Reading in the MaxEnt data and constraint information
2. Running the model using different gradient descent techniques (without regularization)
3. How to add regularization 
    * Target prior ```TGTPrior``` 
    * Difference prior ```DIFPrior```

In [1]:
## Good old NumPy
import numpy as np

## Specialized classes
from utils.MaxEnt import MaxEnt, compute_probabilities
from utils.Regularization import TGTPrior, DIFPrior

## Utility function for reading in constraint and data information
from utils.OTSoft_file_reader import get_info


Below is a helper function to record the model's predictions on both observed and held-out data

In [2]:
import csv

def prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, weights):
    with open(file_name, "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["UR/Candidate", "Probability"])
        for t, vio in enumerate(violations):
            P = compute_probabilities(weights.squeeze(), vio)
            writer.writerow([underlying_forms[t]])
            
            for c, _ in enumerate(candidates[t]):
                writer.writerow([candidates[t][c], f"{round(P[c]*100, 1)}%"])


For this demonstration, we will be looking at the Hayes Pseudo-Korean data

In [3]:
## Read in data and constraint information
constraint_names, underlying_forms, candidates, violations, observed_probs = get_info(
    "toy_datasets/HayesPseudoKorean-RichBase.txt"
)

## Initialize a MaxEnt object
me = MaxEnt(constraint_names)

## Print basic information
print(f"Constraint names and their initial weights:")
for cn in zip(constraint_names, me.cws):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()


Constraint names and their initial weights:
Ident (asp)          3.36
Ident (voice)        1.16
Ident (asp)/_V       3.77
Ident (voice)/_V     5.53
*[+v][-v][+v]        2.01
*dh                  9.85
*[-son/+voice]       0.9
*aspiration          2.12



In [4]:
## ======= WITHOUT ANY REGULARIZATION ============================================================
new_weights, learning_history = me.SGD_learn(violations=violations, observed_prob=observed_probs)

print("Weights after learning:")
for cn in zip(constraint_names, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/no_reg_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          4.96
Ident (voice)        3.49
Ident (asp)/_V       4.71
Ident (voice)/_V     5.65
*[+v][-v][+v]        2.12
*dh                  9.85
*[-son/+voice]       3.02
*aspiration          1.86

Predicted probabilities saved to ./results/no_reg_pred.csv


In [5]:
## ======= M > F WITH TARGET PRIOR ============================================================
cns = constraint_names
grs = [
    ["Ident (asp)", "Ident (voice)", "Ident (asp)/_V", "Ident (voice)/_V"], 
    ["*[+v][-v][+v]", "*dh", "*[-son/+voice]", "*aspiration"]
]
mus = [1, 10]
sms = [1, 1]
target_prior = TGTPrior(cns, grs, mus, sms)

new_weights, learning_history = me.SGD_learn(violations, observed_probs, 1, 10000, 0.05, target_prior)

print("Weights after learning:")
for cn in zip(cns, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/TGT_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          1.69
Ident (voice)        1.06
Ident (asp)/_V       1.69
Ident (voice)/_V     1.06
*[+v][-v][+v]        10.01
*dh                  10.0
*[-son/+voice]       9.99
*aspiration          9.31

Predicted probabilities saved to ./results/TGT_pred.csv


In [6]:
## ======= M > F WITH DIFFERENCE PRIOR ============================================================
cns = constraint_names
grs = [
    ["Ident (asp)", "Ident (voice)", "Ident (asp)/_V", "Ident (voice)/_V"], 
    ["*[+v][-v][+v]", "*dh", "*[-son/+voice]", "*aspiration"]
]
mus = [5]
sms = [3]
cmp = [[1, 0]]
diff_prior = DIFPrior(cns, grs, mus, sms, cmp)

new_weights, learning_history = me.SGD_learn(violations, observed_probs, 1, 10000, 0.05, diff_prior)

print("Weights after learning:")
for cn in zip(cns, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/DIF_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          3.53
Ident (voice)        1.52
Ident (asp)/_V       3.82
Ident (voice)/_V     4.76
*[+v][-v][+v]        2.89
*dh                  10.69
*[-son/+voice]       2.83
*aspiration          2.2

Predicted probabilities saved to ./results/DIF_pred.csv


In [7]:
## ======= M > F WITH DIFFERENCE PRIOR AND PRIOR TO MINIMIZE WEIGHTS ============================================================
cns = constraint_names

## DIF
dgrs = [
    ["Ident (asp)", "Ident (voice)", "Ident (asp)/_V", "Ident (voice)/_V"], 
    ["*[+v][-v][+v]", "*dh", "*[-son/+voice]", "*aspiration"]
]
dmus = [5]
dsms = [5]
dcmp = [[1, 0]]
diff_prior = DIFPrior(cns, dgrs, dmus, dsms, dcmp)

## TGT
tgrs = [
    ["Ident (asp)", "Ident (voice)", "Ident (asp)/_V", "Ident (voice)/_V", 
    "*[+v][-v][+v]", "*dh", "*[-son/+voice]", "*aspiration"
    ]
]
tmus = [0]
tsms = [10]
target_prior = TGTPrior(cns, tgrs, tmus, tsms)

new_weights, learning_history = me.SGD_learn(violations, observed_probs, 1, 10000, 0.05, diff_prior, target_prior)

print("Weights after learning:")
for cn in zip(cns, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/DIF_TGT_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          2.18
Ident (voice)        0.91
Ident (asp)/_V       1.71
Ident (voice)/_V     1.2
*[+v][-v][+v]        2.86
*dh                  4.43
*[-son/+voice]       2.25
*aspiration          0.86

Predicted probabilities saved to ./results/DIF_TGT_pred.csv


In [8]:
## ======= M > F WITH PAIRWISE DIFFERENCE PRIOR ============================================================
cns = constraint_names
grs = [
    ["Ident (asp)", "Ident (voice)", "Ident (asp)/_V", "Ident (voice)/_V"], 
    ["*[+v][-v][+v]"], ["*dh"], ["*[-son/+voice]"], ["*aspiration"]
]
mus = [3, 3, 3, 3]
sms = [3, 3, 3, 3]
cmp = [[1, 0], [2, 0], [3, 0], [4, 0]]
diff_prior = DIFPrior(cns, grs, mus, sms, cmp)

## Perform learning with regularization and returns the final weights
new_weights, learning_history = me.SGD_learn(violations, observed_probs, 1, 100000, 0.05, diff_prior)

print("Weights after learning:")
for cn in zip(cns, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/DIF_pairwise_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          2.81
Ident (voice)        0.0
Ident (asp)/_V       1.18
Ident (voice)/_V     0.0
*[+v][-v][+v]        6.86
*dh                  6.89
*[-son/+voice]       6.93
*aspiration          3.67

Predicted probabilities saved to ./results/DIF_pairwise_pred.csv


In [9]:
## ======= M > F WITH STEPWISE DIFFERENCE PRIOR ============================================================
cns = constraint_names
grs = [
    ["Ident (asp)"], ["Ident (voice)"], ["Ident (asp)/_V"], ["Ident (voice)/_V"], 
    ["*[+v][-v][+v]", "*dh", "*[-son/+voice]", "*aspiration"]
]
mus = [3, 3, 3, 3]
sms = [1, 1, 1, 3]
cmp = [[4, 3], [3, 2], [2, 1], [1, 0]]
diff_prior = DIFPrior(cns, grs, mus, sms, cmp)

## Perform learning with regularization and returns the final weights
new_weights, learning_history = me.SGD_learn(violations, observed_probs, 1, 100000, 0.05, diff_prior)

print("Weights after learning:")
for cn in zip(cns, new_weights):
    print(f'{cn[0]:<20} {round(cn[1][0], 2)}')
print()

file_name = "results/DIF_stepwise_pred.csv"
prob_prediction_to_csv(file_name, underlying_forms, candidates, violations, new_weights)
print(f"Predicted probabilities saved to ./{file_name}")

Weights after learning:
Ident (asp)          2.28
Ident (voice)        5.27
Ident (asp)/_V       8.27
Ident (voice)/_V     11.27
*[+v][-v][+v]        0.63
*dh                  8.47
*[-son/+voice]       1.66
*aspiration          3.51

Predicted probabilities saved to ./results/DIF_stepwise_pred.csv
