In [None]:
import pandas as pd
import re
import numpy as np
import fasttext as ft
from sklearn.neural_network import MLPRegressor
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import  train_test_split
from joblib import dump, load

In [None]:
# Parameter and Paths
fasttext_path = "models/oscar_ft_model_dim1536_ws2.bin.bin"
affects_path = "resources/affective_norms.txt"
output_path = "model/affecter.bin"
n_hidden = 200
max_iter = 1000

In [None]:
# cleaning function

def to_text(x):
    
    x = re.sub("\"|\'|\[|\]|\,","",x)
    
    return x

In [None]:
# Load Resources
ft_model = ft.load_model(fasttext_path)
data = pd.read_csv(affects_path, sep="\t")

In [None]:
# train test split

X_text = [ft_model[to_text(str(x))] for x in data["Word"]][:10000]
Y_scores = np.array(data.loc[:len(X_text)-1,["AbstConc","Arou","IMG","Val"]])
X_train, X_test, y_train, y_test = train_test_split(X_text, Y_scores, test_size=0.1, random_state=42)

In [None]:
# init model

mlp = MLPRegressor(hidden_layer_sizes=(n_hidden,), random_state=1, max_iter=max_iter)

In [None]:
# fit model

mlp.fit(X_train, y_train)

In [None]:
# evaluate model

mlp.score(X_test, y_test)

In [None]:
# save model

dump(mlp, output_path)