# Evaluation

## Questions

In [1]:
import json

with open('evaluation/results.txt') as f:
    lines = [json.loads(l) for l in f.readlines()]
{l['name']: l['questions'] for l in lines}

{'Bundesland,Landeshauptstadt': 55,
 'Geschlecht': 136,
 'Hauptstadt,Land': 260,
 'Land,Kontinent': 231,
 'Land,Sprache/Adjektiv': 10,
 'Land,Währung': 55,
 'Technik': 3,
 'adjectives - Komparativ, Superlativ': 15,
 'adjectives - Positiv, Komparativ': 127,
 'adjectives - Positiv, Superlativ': 11,
 'opposite': 174,
 'signular,plural': 293,
 'total': 3408,
 'verbs - Imparativ, PrPlural': 143,
 'verbs - Imparativ, VgPlural': 182,
 'verbs - Imparativ, VgSingular': 215,
 'verbs - Infinitiv, Imparativ': 219,
 'verbs - Infinitiv, PrPlural': 181,
 'verbs - Infinitiv, VgPlural': 236,
 'verbs - Infinitiv, VgSingular': 287,
 'verbs - PrPlural, VgPlural': 160,
 'verbs - PrPlural, VgSingular': 182,
 'verbs - VgSingular, VgPlural': 233}

## Results

In [2]:
import pandas as pd

df = pd.read_csv('evaluation/results.csv')

In [3]:
syncols = ['sg', 'n', 'size', 'epochs'] + [
    # Verbs
    'verbs - Imparativ, PrPlural',
    'verbs - Infinitiv, VgSingular',
    'verbs - PrPlural, VgPlural',
    'verbs - Infinitiv, PrPlural',
    'verbs - Infinitiv, VgPlural',
    'verbs - VgSingular, VgPlural',
    'verbs - Infinitiv, Imparativ',
    'verbs - PrPlural, VgSingular',
    'verbs - Imparativ, VgPlural',
    'verbs - Imparativ, VgSingular',
    # Adjectives
    'adjectives - Positiv, Komparativ',
    # Other
    'opposite',
    'signular,plural',
]
syndf = df[syncols]

In [4]:
semcols = ['sg', 'n', 'size', 'epochs'] + [
    'Bundesland,Landeshauptstadt',
    'Hauptstadt,Land',
    'Land,Kontinent',
    'Geschlecht',
    'Land,Währung',
]
semdf = df[semcols]

### Max sizes

In [5]:
n = 10000000 or df['n'].max()
size = df['size'].max()
epochs = df['epochs'].max()
n, size, epochs

(10000000, 600, 20)

## Effect of Skip-Gram vs CBOW

* Skip-Gram slightly better on semantic questions.
* Skip-Gram slightly less better on syntactic questions.
* **Doesn't matter** compared to other factors.

### Semantic

In [6]:
semdf[(df['epochs'] == 20) & (df['size'] == 600) & (df['n'] == 10_000_000)]

Unnamed: 0,sg,n,size,epochs,"Bundesland,Landeshauptstadt","Hauptstadt,Land","Land,Kontinent",Geschlecht,"Land,Währung"
26,0,10000000,600,20,0.33,0.58,0.25,0.5,0.29
59,1,10000000,600,20,0.33,0.59,0.24,0.52,0.31


### Syntactic

In [7]:
syndf[(df['epochs'] == 20) & (df['size'] == 300) & (df['n'] == 10_000_000)]

Unnamed: 0,sg,n,size,epochs,"verbs - Imparativ, PrPlural","verbs - Infinitiv, VgSingular","verbs - PrPlural, VgPlural","verbs - Infinitiv, PrPlural","verbs - Infinitiv, VgPlural","verbs - VgSingular, VgPlural","verbs - Infinitiv, Imparativ","verbs - PrPlural, VgSingular","verbs - Imparativ, VgPlural","verbs - Imparativ, VgSingular","adjectives - Positiv, Komparativ",opposite,"signular,plural"
23,0,10000000,300,20,0.5,0.61,0.54,0.59,0.76,0.81,0.52,0.84,0.46,0.47,0.39,0.18,0.24
56,1,10000000,300,20,0.5,0.6,0.53,0.59,0.76,0.82,0.52,0.82,0.46,0.45,0.4,0.2,0.25


## Effect of Vector Size
* 100 not enough.
* For **semantics** is **600 better than 300**.
* For **syntax** is **300 much better than 600**.

### Semantic

#### CBOW

In [8]:
semdf[(df['epochs'] == 20) & (df['n'] == 10_000_000) & (df['sg'] == 0)]

Unnamed: 0,sg,n,size,epochs,"Bundesland,Landeshauptstadt","Hauptstadt,Land","Land,Kontinent",Geschlecht,"Land,Währung"
20,0,10000000,100,20,0.16,0.43,0.11,0.46,0.25
23,0,10000000,300,20,0.25,0.52,0.23,0.54,0.31
26,0,10000000,600,20,0.33,0.58,0.25,0.5,0.29


#### Skip-Gram

In [9]:
semdf[(df['epochs'] == 20) & (df['n'] == 10_000_000) & (df['sg'] == 1)]

Unnamed: 0,sg,n,size,epochs,"Bundesland,Landeshauptstadt","Hauptstadt,Land","Land,Kontinent",Geschlecht,"Land,Währung"
53,1,10000000,100,20,0.15,0.42,0.1,0.5,0.27
56,1,10000000,300,20,0.25,0.54,0.27,0.55,0.33
59,1,10000000,600,20,0.33,0.59,0.24,0.52,0.31


### Syntactic

In [10]:
syndf[(df['epochs'] == 20) & (df['n'] == 10_000_000)]

Unnamed: 0,sg,n,size,epochs,"verbs - Imparativ, PrPlural","verbs - Infinitiv, VgSingular","verbs - PrPlural, VgPlural","verbs - Infinitiv, PrPlural","verbs - Infinitiv, VgPlural","verbs - VgSingular, VgPlural","verbs - Infinitiv, Imparativ","verbs - PrPlural, VgSingular","verbs - Imparativ, VgPlural","verbs - Imparativ, VgSingular","adjectives - Positiv, Komparativ",opposite,"signular,plural"
20,0,10000000,100,20,0.41,0.6,0.52,0.49,0.69,0.81,0.51,0.75,0.41,0.4,0.35,0.14,0.24
23,0,10000000,300,20,0.5,0.61,0.54,0.59,0.76,0.81,0.52,0.84,0.46,0.47,0.39,0.18,0.24
26,0,10000000,600,20,0.48,0.52,0.49,0.57,0.75,0.8,0.48,0.82,0.43,0.46,0.4,0.14,0.22
53,1,10000000,100,20,0.41,0.59,0.5,0.49,0.71,0.8,0.51,0.76,0.43,0.41,0.35,0.14,0.24
56,1,10000000,300,20,0.5,0.6,0.53,0.59,0.76,0.82,0.52,0.82,0.46,0.45,0.4,0.2,0.25
59,1,10000000,600,20,0.48,0.53,0.49,0.59,0.74,0.8,0.48,0.82,0.45,0.46,0.41,0.13,0.23


## Effect of Training Set Size
* Strongly diminishing returns after 10M.
* Nevertheless significant gains from 10M to 20M.

### Semantic

In [11]:
semdf[(df['epochs'] == 20) & (df['size'] == 300)]

Unnamed: 0,sg,n,size,epochs,"Bundesland,Landeshauptstadt","Hauptstadt,Land","Land,Kontinent",Geschlecht,"Land,Währung"
5,0,1000000,300,20,0.09,0.21,0.08,0.27,0.27
14,0,3000000,300,20,0.11,0.38,0.15,0.36,0.29
23,0,10000000,300,20,0.25,0.52,0.23,0.54,0.31
32,0,20000000,300,20,0.24,0.61,0.21,0.51,0.27
38,1,1000000,300,20,0.05,0.24,0.09,0.24,0.27
47,1,3000000,300,20,0.13,0.39,0.15,0.38,0.31
56,1,10000000,300,20,0.25,0.54,0.27,0.55,0.33
65,1,20000000,300,20,0.29,0.62,0.2,0.52,0.27


### Syntactic

In [12]:
syndf[(df['epochs'] == 20) & (df['size'] == 300)]

Unnamed: 0,sg,n,size,epochs,"verbs - Imparativ, PrPlural","verbs - Infinitiv, VgSingular","verbs - PrPlural, VgPlural","verbs - Infinitiv, PrPlural","verbs - Infinitiv, VgPlural","verbs - VgSingular, VgPlural","verbs - Infinitiv, Imparativ","verbs - PrPlural, VgSingular","verbs - Imparativ, VgPlural","verbs - Imparativ, VgSingular","adjectives - Positiv, Komparativ",opposite,"signular,plural"
5,0,1000000,300,20,0.26,0.41,0.3,0.5,0.55,0.63,0.33,0.72,0.22,0.35,0.2,0.1,0.2
14,0,3000000,300,20,0.42,0.48,0.44,0.54,0.7,0.74,0.47,0.78,0.47,0.44,0.34,0.16,0.23
23,0,10000000,300,20,0.5,0.61,0.54,0.59,0.76,0.81,0.52,0.84,0.46,0.47,0.39,0.18,0.24
32,0,20000000,300,20,0.53,0.64,0.56,0.64,0.76,0.82,0.54,0.85,0.49,0.47,0.41,0.19,0.26
38,1,1000000,300,20,0.27,0.43,0.3,0.45,0.51,0.68,0.33,0.72,0.19,0.32,0.2,0.08,0.19
47,1,3000000,300,20,0.4,0.46,0.44,0.53,0.7,0.74,0.47,0.78,0.44,0.46,0.31,0.11,0.22
56,1,10000000,300,20,0.5,0.6,0.53,0.59,0.76,0.82,0.52,0.82,0.46,0.45,0.4,0.2,0.25
65,1,20000000,300,20,0.53,0.63,0.56,0.64,0.76,0.83,0.53,0.85,0.51,0.46,0.41,0.18,0.27


## Effect of Numer of Epochs
* Large gains that justify additional training time.
* In few sections performance decreases.

### Semantic

In [13]:
semdf[(df['n'] == 10_000_000) & (df['size'] == 300)]

Unnamed: 0,sg,n,size,epochs,"Bundesland,Landeshauptstadt","Hauptstadt,Land","Land,Kontinent",Geschlecht,"Land,Währung"
21,0,10000000,300,5,0.22,0.42,0.15,0.51,0.22
22,0,10000000,300,10,0.25,0.55,0.2,0.5,0.27
23,0,10000000,300,20,0.25,0.52,0.23,0.54,0.31
54,1,10000000,300,5,0.2,0.47,0.12,0.49,0.18
55,1,10000000,300,10,0.31,0.52,0.18,0.52,0.25
56,1,10000000,300,20,0.25,0.54,0.27,0.55,0.33


### Syntactic

In [14]:
syndf[(df['n'] == 10_000_000) & (df['size'] == 300)]

Unnamed: 0,sg,n,size,epochs,"verbs - Imparativ, PrPlural","verbs - Infinitiv, VgSingular","verbs - PrPlural, VgPlural","verbs - Infinitiv, PrPlural","verbs - Infinitiv, VgPlural","verbs - VgSingular, VgPlural","verbs - Infinitiv, Imparativ","verbs - PrPlural, VgSingular","verbs - Imparativ, VgPlural","verbs - Imparativ, VgSingular","adjectives - Positiv, Komparativ",opposite,"signular,plural"
21,0,10000000,300,5,0.4,0.6,0.48,0.56,0.75,0.8,0.51,0.8,0.41,0.46,0.37,0.15,0.25
22,0,10000000,300,10,0.45,0.59,0.46,0.59,0.75,0.82,0.52,0.81,0.49,0.46,0.41,0.16,0.24
23,0,10000000,300,20,0.5,0.61,0.54,0.59,0.76,0.81,0.52,0.84,0.46,0.47,0.39,0.18,0.24
54,1,10000000,300,5,0.38,0.58,0.47,0.57,0.74,0.82,0.51,0.8,0.43,0.44,0.38,0.14,0.25
55,1,10000000,300,10,0.44,0.58,0.46,0.6,0.76,0.82,0.52,0.8,0.48,0.45,0.39,0.17,0.23
56,1,10000000,300,20,0.5,0.6,0.53,0.59,0.76,0.82,0.52,0.82,0.46,0.45,0.4,0.2,0.25
