In [2]:
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from zeugma.embeddings import EmbeddingTransformer

In [3]:
uncertainty = pd.read_csv("./incerteza.csv", sep=";")
hyperbole = pd.read_csv("./hyperbole.csv", sep=";")
metonymy = pd.read_csv("./metonymy.csv", sep=";")
antithesis = pd.read_csv("./antithesis.csv", sep=";")

In [4]:
def glove_transform(data):
	glove = EmbeddingTransformer('glove-wiki-gigaword-100')
	x = glove.fit_transform(data.text)

	glove_df = pd.DataFrame(x)
	glove_df["class"] = data["class"]

	return glove_df


In [5]:
def split_xy(data):
	y = data["class"]
	x = data.drop(columns=["class"])

	train_x, test_x, train_y, test_y = train_test_split(
		x, y, stratify=y, test_size=0.2, random_state=67)

	return train_x, test_x, train_y, test_y

In [6]:
def train_glove(data, model):
	train_x, test_x, train_y, test_y = split_xy(data)
	model.fit(train_x, train_y)
	print(model.score(test_x, test_y))
	return model

In [60]:
def test_sentences(text, y, model, transform=True):
	if transform:
		glove = EmbeddingTransformer('glove-wiki-gigaword-100')
		x = glove.fit_transform(text)
	else:
		x=text
	
	print(f"Orig. {list(y)}")
	print(f"Pred. {model.predict(x)}")
	print(model.score(x, y))

In [8]:
hyperbole_glove = glove_transform(hyperbole)

In [9]:
uncertainty_glove = glove_transform(uncertainty)

In [10]:
metonymy_glove = glove_transform(metonymy)

In [11]:
antithesis_glove = glove_transform(antithesis)

In [12]:
uncertainty_model = train_glove(uncertainty_glove, LinearSVC())

0.7272727272727273


In [13]:
hyperbole_model = train_glove(hyperbole_glove, LinearSVC())

0.8571428571428571


In [65]:
metonymy_model = train_glove(metonymy_glove, LinearSVC())

0.5454545454545454


In [57]:
antithesis_model = train_glove(antithesis_glove, LinearSVC())

0.6428571428571429


In [47]:
def load_captions():
	data = pd.read_csv("captions.csv",sep=";")
	glove = EmbeddingTransformer('glove-wiki-gigaword-100')
	x = glove.fit_transform(data.text)
	glove_df = pd.DataFrame(x)
	glove_df[y_cols] = data[y_cols]
	return glove_df

In [50]:
captions = load_captions()

In [20]:
sentences = [
	"I've done this a hundred times before",
	"This is reasonable",
	"This tree is as tall as a building",
	"I am going to die if I don't finish this",
	"This won't work",
	"Brazil is the country that most preserves the environment",
	"President this message from you regarding the day of the Amazon is important",
	"The President values care for the people of the Amazon the Amazon environment",
	"And for that the important measures that you have supported: valuing people, the environment, land regularization, economic-ecological zoning",
	"You opened the economy and payment for environmental services in the X program",
	"In addition the program that you created the program to adopt a park for those who really want to help in the 132 preservation units, put your hand in your pockets and leave the field of speech and really help.",
	"For the first time, the environment in Brazil is no longer a good business just for environmentalists, but an adequate conservation for people, associated with sustainable production, whether in the Amazon or outside it.",
	"Brazil is the country that preserves the environment the most."
]

classes = [0,1,0,0,1,1,1,1,1,1,1,1,1]

test_sentences(sentences, classes, hyperbole_model)

Orig. [0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Pred. [0 1 0 0 1 1 1 1 1 1 0 1 1]
0.9230769230769231


In [42]:
y_cols = ["hyperbole","antithesis","metonymy","uncertainty"]

In [63]:
test_sentences(captions.drop(columns=y_cols), captions.hyperbole, hyperbole_model, transform=False)

Orig. [1, 1, 1, 1, 1, 1, 1]
Pred. [1 1 1 1 0 1 1]
0.8571428571428571


In [62]:
test_sentences(captions.drop(columns=y_cols), captions.uncertainty, uncertainty_model, transform=False)

Orig. [1, 1, 1, 1, 1, 1, 1]
Pred. [0 1 1 1 0 0 1]
0.5714285714285714


In [61]:
test_sentences(captions.drop(columns=y_cols), captions.antithesis, antithesis_model, transform=False)

Orig. [1, 1, 1, 1, 0, 0, 1]
Pred. [1 0 1 1 1 0 0]
0.5714285714285714


In [66]:
test_sentences(captions.drop(columns=y_cols), captions.metonymy, metonymy_model, transform=False)

Orig. [1, 1, 1, 1, 1, 1, 1]
Pred. [1 1 1 0 0 0 1]
0.5714285714285714
