In [3]:
import sys, os

import numpy as np


import gridworld as W                       # basic grid-world MDPs
import trajectory as T                      # trajectory generation
import optimizer as O                       # stochastic gradient descent optimizer
import solver as S                          # MDP solver (value-iteration)
import plot as P

from maxent import irl

ModuleNotFoundError: ignored

In [26]:
def word_to_state(word):
    verb_positive_words = ['bloom', 'soar', 'sparkle', 'thrive', 'illuminate']
    verb_negative_words = ['complain', 'argue', 'blame', 'grumble', 'snarl']
    verb_inactive_words = ['rest', 'pause', 'hover', 'laze', 'bask']
    verb_active_words = ['smash', 'grab', 'push', 'shout', 'storm']
    adv_time_words = ['suddenly', 'gradually', 'eventually', 'shortly', 'instantly']
    adv_place_words = ['here', 'there', 'everywhere', 'nowhere', 'somewhere']
    adv_interrogative_words = ['curiously', 'wonderingly', 'questioningly', 'doubtfully', 'pensively']
    adj_descriptive_words = ['bright', 'calm', 'colorful', 'peaceful', 'radiant']
    adj_quantitative_words = ['little', 'few', 'some', 'many', 'much']
    conj_uncertain_words = ['maybe', 'possibly', 'likely', 'arguably', 'presumably']
    conj_certain_words = ['definitely', 'surely', 'clearly', 'obviously', 'undoubtedly']
    conj_explanatory_words = ['because', 'since', 'as', 'so', 'for']
    nouns_elaborated_words = ['adventure', 'journey', 'discovery', 'odyssey', 'quest', 'serenity', 'jubilee', 'harmony', 'oasis', 'beacon']
    nouns_basic_words = ['car', 'house', 'job', 'money', 'phone', 'discord', 'blight', 'accident', 'damage', 'pain']
    first_person_pronouns = ['i', 'me', 'my', 'mine', 'we', 'us', 'our', 'ours']
    second_person_pronouns = ['you', 'your', 'yours']
    third_person_pronouns = ['he', 'him', 'his', 'she', 'her', 'hers', 'it', 'its', 'they', 'them', 'their', 'theirs']
    prepositions = ['in', 'on', 'under', 'over', 'with', 'without']
    determiners = ['the', 'a', 'an', 'this', 'that', 'these', 'those']

    word_lower = word.lower()

    if word_lower in verb_positive_words:
        return 'Verb positive'
    elif word_lower in verb_negative_words:
        return 'Verb negative'
    elif word_lower in verb_inactive_words:
        return 'Verb inactive'
    elif word_lower in verb_active_words:
        return 'Verb active'
    elif word_lower in adv_time_words:
        return 'Adv Time'
    elif word_lower in adv_place_words:
        return 'Adv Place'
    elif word_lower in adv_interrogative_words:
        return 'Adv Interrogative'
    elif word_lower in adj_descriptive_words:
        return 'Adj Descriptive'
    elif word_lower in adj_quantitative_words:
        return 'Adj Quantitative'
    elif word_lower in conj_uncertain_words:
        return 'Conj uncertain'
    elif word_lower in conj_certain_words:
        return 'Conj certain'
    elif word_lower in conj_explanatory_words:
        return 'Conj explanatory'
    elif word_lower in nouns_elaborated_words:
        return 'Noun educated'
    elif word_lower in nouns_basic_words:
        return 'Noun basic'
    elif word_lower in first_person_pronouns:
        return 'P first person'
    elif word_lower in second_person_pronouns:
        return 'P second person'
    elif word_lower in third_person_pronouns:
        return 'P third person'
    elif word_lower in prepositions:
        return 'Preposition'
    elif word_lower in determiners:
        return 'Det'
    else:
        assert 1+1 == 5
        return 'Unknown'

# Example usage:
word = 'ThRIve'
result = word_to_state(word)
print(f"The word '{word}' corresponds to the state: {result}")


The word 'ThRIve' corresponds to the state: Verb positive


In [6]:
states = [
    'Verb positive', 'Verb negative', 'Verb active', 'Verb inactive',
    'Adv Time', 'Adv Place', 'Adv Interrogative',
    'Adj Descriptive', 'Adj Quantitative',
    'Conj certain', 'Conj uncertain', 'Conj defensive', 'Conj explanatory',
    'P first person', 'P second person', 'P third person',
    'Noun basic', 'Noun educated',
    'Preposition',
    'Det'
    ]


states_one_hot = np.zeros((len(states), len(states)))

one_hot_encoding_dic = {state: [0] * len(states) for state in states}

# Set the corresponding index to 1 for each state
for i, state in enumerate(states):
    one_hot_encoding_dic[state][i] = 1
    states_one_hot[i, i] = 1



In [10]:
states_one_hot[:5]

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]])

In [13]:
one_hot_encoding_dic

{'Verb positive': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Verb negative': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Verb active': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Verb inactive': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Adv Time': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Adv Place': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Adv Interrogative': [0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'Adj Descriptive': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'Adj Quantitative': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'Conj certain': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Conj uncertain': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
 

In [None]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Corpus provided by the user
corpus = ['I love basketball',
          'I love football',
          'I love soccer',
          'I love tennis',
          'I love volleyball',
          'I love cake',
          'I love cookies',
          'I love pizza',
          'I love chocolate',
          'I love candies',
          ]

corpus_1 = ['I love basketball',
          'I love football',
          'I love soccer',
          'I love tennis',
          'I love volleyball',]

corpus_2 = ['I love cake',
          'I love cookies',
          'I love pizza',
          'I love chocolate',
          'I love candies',
          ]





In [None]:
# Tokenize the sentences and create a set of unique words
unique_words = set(word for sentence in corpus for word in sentence.split())

# Sort the unique words to have consistent order
sorted_unique_words = sorted(list(unique_words))

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Reshape and fit transform the sorted unique words
one_hot_encoded = encoder.fit_transform(np.array(sorted_unique_words).reshape(-1, 1))

# Map each unique word to its one-hot encoded vector
word_to_one_hot = dict(zip(sorted_unique_words, one_hot_encoded))

# Display the one-hot encoded vectors
for word, one_hot_vector in word_to_one_hot.items():
    print(f"Word: {word}, One-hot vector: {one_hot_vector}")




Word: I, One-hot vector: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Word: basketball, One-hot vector: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Word: cake, One-hot vector: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Word: candies, One-hot vector: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Word: chocolate, One-hot vector: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
Word: cookies, One-hot vector: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
Word: football, One-hot vector: [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
Word: love, One-hot vector: [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
Word: pizza, One-hot vector: [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Word: soccer, One-hot vector: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Word: tennis, One-hot vector: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
Word: volleyball, One-hot vector: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]




In [None]:
one_hot_encoded

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [None]:
word_to_one_hot

{'I': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'basketball': array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'cake': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'candies': array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'chocolate': array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 'cookies': array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 'football': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]),
 'love': array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 'pizza': array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]),
 'soccer': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 'tennis': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'volleyball': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])}

In [None]:
# Initialize the 3D matrix of zeros
transition = np.zeros((12, 12, 12))

# Iterate through the second dimension to set the ith column to ones
for i in range(12):
    transition[:, i, i] = 1



In [None]:
D1 = []


for sentence  in corpus_1:
  episode = []
  sentence = sentence.split()
  for i in range(len(sentence) - 1):

    s = np.argmax(word_to_one_hot[sentence[i]])
    s_next = np.argmax(word_to_one_hot[sentence[i + 1]])
    episode.append((s, s_next, s_next))

  D1.append(T.Trajectory(episode))

In [None]:
one_hot_encoded

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [None]:
D1

[Trajectory([(0, 7, 7), (7, 1, 1)]),
 Trajectory([(0, 7, 7), (7, 6, 6)]),
 Trajectory([(0, 7, 7), (7, 9, 9)]),
 Trajectory([(0, 7, 7), (7, 10, 10)]),
 Trajectory([(0, 7, 7), (7, 11, 11)])]

In [None]:
terminal1 = []

for traj in D1:
  terminal1.append(traj._t[-1][-1])

terminal1 = list(set(terminal1))

In [None]:
terminal1

[1, 6, 9, 10, 11]

In [None]:
#   initialize parameters with constant
init = O.Constant(1.0)

# choose our optimization strategy:
#   we select exponentiated stochastic gradient descent with linear learning-rate decay
optim = O.ExpSga(lr=O.linear_decay(lr0=0.2))

# Computing the R function through inverse reinforcement learning
reward_maxent1 = irl(transition, one_hot_encoded, terminal1, D1, optim, init)

In [None]:
reward_maxent1

array([0.7648935 , 1.0000095 , 0.7648935 , 0.7648935 , 0.7648935 ,
       0.7648935 , 1.0000095 , 2.34831335, 0.7648935 , 1.0000095 ,
       1.0000095 , 1.0000095 ])

In [None]:
word_to_one_hot

{'I': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'basketball': array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'cake': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'candies': array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'chocolate': array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 'cookies': array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 'football': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]),
 'love': array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 'pizza': array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]),
 'soccer': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 'tennis': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'volleyball': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])}

In [None]:
D2 = []


for sentence  in corpus_2:
  episode = []
  sentence = sentence.split()
  for i in range(len(sentence) - 1):

    s = np.argmax(word_to_one_hot[sentence[i]])
    s_next = np.argmax(word_to_one_hot[sentence[i + 1]])
    episode.append((s, s_next, s_next))

  D2.append(T.Trajectory(episode))

In [None]:
terminal2 = []

for traj in D2:
  terminal2.append(traj._t[-1][-1])

terminal2 = list(set(terminal2))

In [None]:
#   initialize parameters with constant
init = O.Constant(1.0)

# choose our optimization strategy:
#   we select exponentiated stochastic gradient descent with linear learning-rate decay
optim = O.ExpSga(lr=O.linear_decay(lr0=0.2))

# Computing the R function through inverse reinforcement learning
reward_maxent2 = irl(transition, one_hot_encoded, terminal2, D2, optim, init)

In [None]:
reward_maxent2

array([0.7648935 , 0.7648935 , 1.0000095 , 1.0000095 , 1.0000095 ,
       1.0000095 , 0.7648935 , 2.34831335, 1.0000095 , 0.7648935 ,
       0.7648935 , 0.7648935 ])

In [None]:
word_to_one_hot

{'I': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'basketball': array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'cake': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'candies': array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'chocolate': array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 'cookies': array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 'football': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]),
 'love': array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 'pizza': array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]),
 'soccer': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 'tennis': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'volleyball': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])}