# EvoMSA's Quickstart Guide
## colab.research.google.com
### Mario Graff
#### ingeotec.mx

## Steps to install EvoMSA

In [23]:
!pip install sparsearray
!pip install evodag
!pip install microtc>=2.2.0
!pip install evomsa



## First model

### Reading a dataset

In [0]:
import os
from EvoMSA import base
tweets = os.path.join(os.path.dirname(base.__file__), 'tests', 'tweets.json')

In [0]:
from microtc.utils import tweet_iterator
D = list(tweet_iterator(tweets))

In [28]:
D[1]

{'klass': 'P',
 'q_voc_ratio': 0.9102564102564102,
 'text': '@VaneTearsen  te como con patatas'}

### Model

In [29]:
X = [x['text'] for x in D]
y = [x['klass'] for x in D]
evo = base.EvoMSA(stacked_method="sklearn.naive_bayes.GaussianNB").fit(X, y)

100%|██████████| 1/1 [00:02<00:00,  2.70s/it]
100%|██████████| 5/5 [00:00<00:00, 11.77it/s]


In [31]:
evo.predict(["hola mundo"])

100%|██████████| 1/1 [00:00<00:00, 376.04it/s]
100%|██████████| 1/1 [00:00<00:00, 442.90it/s]


array(['P'], dtype='<U4')

### Performance

In [0]:
from sklearn import model_selection

In [0]:
train, test = model_selection.train_test_split(D, test_size=0.2)

In [34]:
evo = base.EvoMSA(stacked_method="sklearn.naive_bayes.GaussianNB").fit(train, [x['klass'] for x in train])

100%|██████████| 1/1 [00:02<00:00,  2.20s/it]
100%|██████████| 5/5 [00:00<00:00, 15.27it/s]


In [0]:
from sklearn import metrics

In [36]:
metrics.f1_score([x['klass'] for x in test], evo.predict(test), average="macro")

100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00, 491.14it/s]


0.5153239428945074

## Model using EvoMSA's pre-trained text models

In [0]:
evo = base.EvoMSA(Emo=True, stacked_method="sklearn.naive_bayes.GaussianNB", lang="es").fit(train, [x['klass'] for x in train])

In [39]:
metrics.f1_score([x['klass'] for x in test], evo.predict(test), average="macro")

100%|██████████| 2/2 [00:00<00:00,  3.50it/s]
100%|██████████| 2/2 [00:00<00:00, 700.98it/s]


0.5248983462229129

In [0]:
evo = base.EvoMSA(Aggress=True, Emo=True, stacked_method="sklearn.naive_bayes.GaussianNB", lang="es").fit(train, [x['klass'] for x in train])

In [41]:
metrics.f1_score([x['klass'] for x in test], evo.predict(test), average="macro")

100%|██████████| 3/3 [00:00<00:00,  4.54it/s]
100%|██████████| 3/3 [00:00<00:00, 730.12it/s]


0.5294844540698902

## EvoMSA's pre-trained models by their own

In [0]:
from EvoMSA.utils import download
from microtc.utils import load_model
emo = load_model(download("emo_En.tm"))

In [47]:
emo._labels[emo.transform(["hello world"])[0].argmax()]

'♡'

In [0]:
model = "semeval2017_En.evomsa"
model = load_model(download(model))

In [50]:
model.predict(["hello world", "EvoMSA is working and we are almost at the end"])

100%|██████████| 2/2 [00:00<00:00, 214.20it/s]
100%|██████████| 2/2 [00:00<00:00, 798.08it/s]


array(['positive', 'neutral'], dtype='<U8')