# Word2Vec using Gensim

In [1]:
"""
isiZulu Word Similarity Benchmark Evaluator - mBERT VERSION
Adapted for isiZulu word embeddings using multilingual BERT with support for:
- Custom isiZulu word similarity datasets
- SimLex-999 and WordSim-353 (if translated to isiZulu)
- Precision, Recall, Accuracy, and F1 Score metrics
- Handling of isiZulu morphology and agglutination
- Contextualized embeddings from mBERT

Dependencies:
    pip install transformers torch scikit-learn numpy scipy
"""

import os
import csv
import numpy as np
from scipy.stats import spearmanr, pearsonr
import torch
from transformers import BertTokenizer, BertModel
import warnings
warnings.filterwarnings('ignore')


def confusion_matrix_np(y_true, y_pred):
    """
    Compute confusion matrix without sklearn.
    
    Returns:
        tn, fp, fn, tp
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    tp = np.sum((y_true == 1) & (y_pred == 1))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    
    return tn, fp, fn, tp


def accuracy_np(tp, tn, fp, fn):
    """Compute accuracy."""
    total = tp + tn + fp + fn
    if total == 0:
        return 0.0
    return (tp + tn) / total


def precision_np(tp, fp):
    """Compute precision."""
    if (tp + fp) == 0:
        return 0.0
    return tp / (tp + fp)


def recall_np(tp, fn):
    """Compute recall."""
    if (tp + fn) == 0:
        return 0.0
    return tp / (tp + fn)


def f1_np(precision, recall):
    """Compute F1 score."""
    if (precision + recall) == 0:
        return 0.0
    return 2 * (precision * recall) / (precision + recall)


class MBertEmbedder:
    """
    Wrapper for mBERT to generate word embeddings.
    Handles contextualized embeddings with multiple pooling strategies.
    """
    
    def __init__(self, model_name='bert-base-multilingual-cased', device=None):
        """
        Initialize mBERT model and tokenizer.
        
        Args:
            model_name: HuggingFace model identifier
            device: 'cuda', 'cpu', or None (auto-detect)
        """
        print(f"üîÑ Loading {model_name}...")
        
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertModel.from_pretrained(model_name)
        
        # Set device
        if device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = torch.device(device)
        
        self.model.to(self.device)
        self.model.eval()
        
        print(f"‚úÖ Model loaded on {self.device}")
    
    def get_word_embedding(self, word, pooling='mean', layer=-1):
        """
        Get contextualized embedding for a single word.
        
        Args:
            word: isiZulu word (string)
            pooling: How to pool subword tokens:
                    'mean' - Average all subword embeddings (default)
                    'first' - Use first subword token (like [CLS] for full word)
                    'last' - Use last subword token
                    'max' - Max pooling over subword tokens
            layer: Which BERT layer to use:
                   -1 = last layer (default)
                   -2 = second-to-last layer
                   0-11 = specific layer
        
        Returns:
            numpy array of shape (768,) - the word embedding
        """
        # Tokenize word
        tokens = self.tokenizer.tokenize(word)
        
        # Add special tokens
        tokens = ['[CLS]'] + tokens + ['[SEP]']
        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
        
        # Convert to tensor
        input_ids = torch.tensor([token_ids]).to(self.device)
        
        # Get embeddings
        with torch.no_grad():
            outputs = self.model(input_ids, output_hidden_states=True)
            hidden_states = outputs.hidden_states  # Tuple of (layer, batch, seq, hidden)
        
        # Select layer
        layer_embeddings = hidden_states[layer][0]  # Shape: (seq_len, 768)
        
        # Remove [CLS] and [SEP] to get only word tokens
        word_embeddings = layer_embeddings[1:-1]  # Shape: (n_subwords, 768)
        
        # Pool subword tokens
        if pooling == 'mean':
            embedding = word_embeddings.mean(dim=0)
        elif pooling == 'first':
            embedding = word_embeddings[0]
        elif pooling == 'last':
            embedding = word_embeddings[-1]
        elif pooling == 'max':
            embedding = word_embeddings.max(dim=0)[0]
        else:
            raise ValueError(f"Unknown pooling method: {pooling}")
        
        return embedding.cpu().numpy()
    
    def get_similarity(self, word1, word2, pooling='mean', layer=-1):
        """
        Compute cosine similarity between two words using mBERT embeddings.
        
        Args:
            word1, word2: isiZulu words
            pooling: Pooling strategy for subword tokens
            layer: Which BERT layer to use
            
        Returns:
            Cosine similarity (float between -1 and 1)
        """
        emb1 = self.get_word_embedding(word1, pooling=pooling, layer=layer)
        emb2 = self.get_word_embedding(word2, pooling=pooling, layer=layer)
        
        # Cosine similarity
        similarity = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
        
        return similarity


class IsiZuluBenchmarkEvaluatorMBERT:
    """
    Enhanced evaluator for isiZulu word embeddings using mBERT.
    Includes both correlation metrics and classification metrics.
    
    Handles isiZulu-specific challenges:
    - Agglutinative morphology (prefixes, suffixes)
    - Multiple word forms (singular/plural, noun classes)
    - Code-switching with English
    - Contextualized embeddings with multiple pooling strategies
    """
    
    def __init__(self, model=None, cache_size=1000, pooling='mean', layer=-1):
        """
        Initialize evaluator with mBERT model.
        
        Args:
            model: MBertEmbedder instance or None (will create default)
            cache_size: Size of similarity cache (default: 1000)
            pooling: Pooling strategy ('mean', 'first', 'last', 'max')
            layer: Which BERT layer to use (-1 = last layer)
        """
        if model is None:
            self.model = MBertEmbedder()
        else:
            self.model = model
        
        self.cache_size = cache_size
        self.similarity_cache = {}
        self.pooling = pooling
        self.layer = layer
        
        print(f"‚úÖ Evaluator initialized with pooling={pooling}, layer={layer}")
        
    def get_similarity(self, word1, word2):
        """
        Get cosine similarity between two isiZulu words using mBERT.
        Uses cache to avoid redundant computations.
        
        Handles isiZulu-specific cases:
        - Lowercasing for consistency
        - Caching for efficiency
        """
        # Normalize words (lowercase for isiZulu)
        word1 = word1.lower().strip()
        word2 = word2.lower().strip()
        
        # Check cache first
        cache_key = tuple(sorted([word1, word2]))
        if cache_key in self.similarity_cache:
            return self.similarity_cache[cache_key]
        
        try:
            # Get similarity from mBERT
            sim = self.model.get_similarity(word1, word2, 
                                           pooling=self.pooling, 
                                           layer=self.layer)
            
            # Cache the result
            if len(self.similarity_cache) < self.cache_size:
                self.similarity_cache[cache_key] = sim
            
            return sim
            
        except Exception as e:
            print(f"‚ö†Ô∏è  Error computing similarity for '{word1}' and '{word2}': {e}")
            return None
    
    def binarize_scores(self, scores, threshold='median'):
        """
        Convert continuous scores to binary labels.
        
        Args:
            scores: Array of continuous scores
            threshold: 'median', 'mean', or a specific numeric value
            
        Returns:
            Binary array (1 for similar, 0 for dissimilar), threshold value
        """
        scores = np.array(scores)
        
        if threshold == 'median':
            thresh_val = np.median(scores)
        elif threshold == 'mean':
            thresh_val = np.mean(scores)
        else:
            thresh_val = threshold
            
        return (scores >= thresh_val).astype(int), thresh_val
    
    def compute_classification_metrics(self, human_scores, model_scores, 
                                       threshold='median', verbose=True):
        """
        Compute precision, recall, accuracy, and F1 score for isiZulu embeddings.
        
        Treats word similarity as a binary classification problem:
        - Similar pairs (high scores) vs. Dissimilar pairs (low scores)
        
        Args:
            human_scores: Ground truth similarity scores
            model_scores: Model-predicted similarity scores
            threshold: How to binarize scores ('median', 'mean', or numeric value)
            verbose: Whether to print detailed results
            
        Returns:
            Dictionary with classification metrics
        """
        # Binarize scores
        y_true, human_thresh = self.binarize_scores(human_scores, threshold)
        y_pred, model_thresh = self.binarize_scores(model_scores, threshold)
        
        # Compute confusion matrix
        tn, fp, fn, tp = confusion_matrix_np(y_true, y_pred)
        
        # Compute metrics
        accuracy = accuracy_np(tp, tn, fp, fn)
        precision = precision_np(tp, fp)
        recall = recall_np(tp, fn)
        f1 = f1_np(precision, recall)
        
        # Create confusion matrix for compatibility
        cm = np.array([[tn, fp], [fn, tp]])
        
        if verbose:
            print(f"\nüìä Amamethrikhi Okuhlukanisa (Classification Metrics)")
            print(f"  Threshold: {threshold}")
            print(f"  Human threshold:  {human_thresh:.4f}")
            print(f"  Model threshold:  {model_thresh:.4f}")
            print(f"\n  Ukunemba (Accuracy):   {accuracy:.4f}")
            print(f"  Ukunembile (Precision): {precision:.4f}")
            print(f"  Ukukhumbula (Recall):   {recall:.4f}")
            print(f"  I-F1 Score:             {f1:.4f}")
            print(f"\n  Confusion Matrix:")
            print(f"                    Okubikezelwe (Predicted)")
            print(f"                  Akufani  Kuyafana")
            print(f"    Iqiniso   Akufani  {tn:4d}   {fp:4d}")
            print(f"    (Actual)  Kuyafana {fn:4d}   {tp:4d}")
            print(f"\n  Amaqiniso Apositivi (True Positives):  {tp}")
            print(f"  Amaqiniso Anegativi (True Negatives):  {tn}")
            print(f"  Amanga Apositivi (False Positives):     {fp}")
            print(f"  Amanga Anegativi (False Negatives):     {fn}")
        
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'confusion_matrix': cm,
            'true_positives': int(tp),
            'true_negatives': int(tn),
            'false_positives': int(fp),
            'false_negatives': int(fn),
            'human_threshold': human_thresh,
            'model_threshold': model_thresh,
            'y_true': y_true,
            'y_pred': y_pred
        }
    
    def evaluate_similarity(self, word_pairs, compute_classification=True, 
                          threshold='median'):
        """
        Evaluate mBERT model on word pairs with both correlation and classification metrics.
        
        Args:
            word_pairs: List of tuples (word1, word2, human_score)
            compute_classification: Whether to compute precision/recall/F1
            threshold: Threshold for binarization ('median', 'mean', or numeric)
            
        Returns:
            Dictionary with all evaluation results
        """
        model_scores = []
        human_scores = []
        missing_pairs = []
        valid_pairs = []
        
        print(f"\nüîÑ Computing similarities for {len(word_pairs)} word pairs...")
        
        for i, (word1, word2, human_score) in enumerate(word_pairs):
            if (i + 1) % 10 == 0:
                print(f"  Progress: {i+1}/{len(word_pairs)} pairs processed...", end='\r')
            
            sim = self.get_similarity(word1, word2)
            
            if sim is not None:
                model_scores.append(sim)
                human_scores.append(human_score)
                valid_pairs.append((word1, word2))
            else:
                missing_pairs.append((word1, word2))
        
        print(f"  Progress: {len(word_pairs)}/{len(word_pairs)} pairs processed... ‚úÖ")
        
        # Compute correlations
        if len(model_scores) < 2:
            print("‚ùå Amapheyari awanele ukubala amamethrikhi (Not enough valid pairs)")
            return None
        
        spearman_corr, spearman_pval = spearmanr(human_scores, model_scores)
        pearson_corr, pearson_pval = pearsonr(human_scores, model_scores)
        
        coverage = len(model_scores) / len(word_pairs) * 100
        
        # Print correlation results
        print(f"\nüìä Amamethrikhi Wokuhlobana (Correlation Metrics):")
        print(f"  Ukumbozwa:    {len(model_scores)}/{len(word_pairs)} pairs ({coverage:.1f}%)")
        print(f"  Spearman œÅ:   {spearman_corr:.4f} (p={spearman_pval:.4e})")
        print(f"  Pearson r:    {pearson_corr:.4f} (p={pearson_pval:.4e})")
        
        results = {
            'spearman': spearman_corr,
            'spearman_pval': spearman_pval,
            'pearson': pearson_corr,
            'pearson_pval': pearson_pval,
            'coverage': coverage,
            'n_pairs': len(model_scores),
            'total_pairs': len(word_pairs),
            'missing_pairs': missing_pairs,
            'valid_pairs': valid_pairs
        }
        
        # Compute classification metrics
        if compute_classification:
            classification_results = self.compute_classification_metrics(
                human_scores, model_scores, threshold=threshold, verbose=True
            )
            results.update(classification_results)
        
        if missing_pairs:
            print(f"\n‚ö†Ô∏è  Missing/error pairs: {len(missing_pairs)}")
            if len(missing_pairs) <= 10:
                for w1, w2 in missing_pairs:
                    print(f"  - {w1}, {w2}")
                
        return results
    
    def load_isizulu_similarity_dataset(self, filepath):
        """
        Load isiZulu word similarity dataset.
        
        Expected format (CSV or TSV):
        word1, word2, similarity_score
        
        Example:
        umfazi,indoda,5.2
        ingane,umntwana,9.5
        inja,ikati,6.8
        
        Returns:
            List of tuples (word1, word2, similarity_score)
        """
        word_pairs = []
        
        if not os.path.exists(filepath):
            print(f"‚ö†Ô∏è  Ifayela ayitholakali (File not found): {filepath}")
            return None
        
        # Detect delimiter
        with open(filepath, 'r', encoding='utf-8') as f:
            first_line = f.readline()
            delimiter = '\t' if '\t' in first_line else ','
        
        with open(filepath, 'r', encoding='utf-8') as f:
            reader = csv.reader(f, delimiter=delimiter)
            
            # Check if there's a header
            first_row = next(reader)
            if not first_row[0].replace('.','').replace('-','').isdigit():
                # Has header, skip it
                pass
            else:
                # No header, process first row
                if len(first_row) >= 3:
                    word1 = first_row[0].strip().lower()
                    word2 = first_row[1].strip().lower()
                    score = float(first_row[2])
                    word_pairs.append((word1, word2, score))
            
            for row in reader:
                if len(row) >= 3:
                    word1 = row[0].strip().lower()
                    word2 = row[1].strip().lower()
                    try:
                        score = float(row[2])
                        word_pairs.append((word1, word2, score))
                    except ValueError:
                        continue
        
        print(f"‚úÖ Kulayishwe amapheyari {len(word_pairs)} amagama (Loaded {len(word_pairs)} word pairs)")
        return word_pairs
    
    def evaluate_isizulu_dataset(self, filepath, 
                                  compute_classification=True, 
                                  threshold='median'):
        """
        Evaluate mBERT model on isiZulu word similarity dataset.
        
        Returns:
            Dictionary with all evaluation results
        """
        print("\n" + "="*60)
        print("üìä Ukuhlolwa Kwedathasethe YesiZulu (isiZulu Dataset Evaluation)")
        print("="*60)
        
        word_pairs = self.load_isizulu_similarity_dataset(filepath)
        if word_pairs is None:
            return None
        
        return self.evaluate_similarity(word_pairs, compute_classification, threshold)
    
    def compare_pooling_strategies(self, word_pairs, pooling_methods=['mean', 'first', 'last', 'max']):
        """
        Compare different pooling strategies for subword tokens.
        
        Args:
            word_pairs: List of tuples (word1, word2, human_score)
            pooling_methods: List of pooling strategies to compare
            
        Returns:
            Dictionary with results for each pooling method
        """
        print("\n" + "="*70)
        print("üî¨ POOLING STRATEGY COMPARISON")
        print("="*70)
        
        results = {}
        
        for pooling in pooling_methods:
            print(f"\nüìä Testing pooling method: {pooling.upper()}")
            print("‚îÄ"*70)
            
            # Temporarily change pooling method
            original_pooling = self.pooling
            self.pooling = pooling
            self.similarity_cache = {}  # Clear cache
            
            # Evaluate
            result = self.evaluate_similarity(word_pairs, compute_classification=True, threshold='median')
            
            if result:
                results[pooling] = result
            
            # Restore original pooling
            self.pooling = original_pooling
        
        # Print summary
        print("\n" + "="*70)
        print("üìà POOLING STRATEGY SUMMARY")
        print("="*70)
        
        for pooling, data in results.items():
            print(f"\n{pooling.upper()}:")
            print(f"  Spearman œÅ: {data['spearman']:.4f}")
            print(f"  Pearson r:  {data['pearson']:.4f}")
            print(f"  Accuracy:   {data['accuracy']:.4f}")
            print(f"  F1 Score:   {data['f1_score']:.4f}")
        
        # Find best
        best_spearman = max(results.items(), key=lambda x: x[1]['spearman'])
        best_f1 = max(results.items(), key=lambda x: x[1]['f1_score'])
        
        print(f"\nüèÜ Best Spearman œÅ: {best_spearman[0].upper()} ({best_spearman[1]['spearman']:.4f})")
        print(f"üèÜ Best F1 Score:   {best_f1[0].upper()} ({best_f1[1]['f1_score']:.4f})")
        
        return results
    
    def compare_layers(self, word_pairs, layers=[-4, -3, -2, -1]):
        """
        Compare different BERT layers for word embeddings.
        
        Args:
            word_pairs: List of tuples (word1, word2, human_score)
            layers: List of layer indices to compare (-1 = last layer)
            
        Returns:
            Dictionary with results for each layer
        """
        print("\n" + "="*70)
        print("üî¨ BERT LAYER COMPARISON")
        print("="*70)
        
        results = {}
        
        for layer_idx in layers:
            print(f"\nüìä Testing layer: {layer_idx}")
            print("‚îÄ"*70)
            
            # Temporarily change layer
            original_layer = self.layer
            self.layer = layer_idx
            self.similarity_cache = {}  # Clear cache
            
            # Evaluate
            result = self.evaluate_similarity(word_pairs, compute_classification=True, threshold='median')
            
            if result:
                results[f"layer_{layer_idx}"] = result
            
            # Restore original layer
            self.layer = original_layer
        
        # Print summary
        print("\n" + "="*70)
        print("üìà LAYER COMPARISON SUMMARY")
        print("="*70)
        
        for layer_name, data in results.items():
            print(f"\n{layer_name.upper()}:")
            print(f"  Spearman œÅ: {data['spearman']:.4f}")
            print(f"  Pearson r:  {data['pearson']:.4f}")
            print(f"  Accuracy:   {data['accuracy']:.4f}")
            print(f"  F1 Score:   {data['f1_score']:.4f}")
        
        # Find best
        best_spearman = max(results.items(), key=lambda x: x[1]['spearman'])
        best_f1 = max(results.items(), key=lambda x: x[1]['f1_score'])
        
        print(f"\nüèÜ Best Spearman œÅ: {best_spearman[0].upper()} ({best_spearman[1]['spearman']:.4f})")
        print(f"üèÜ Best F1 Score:   {best_f1[0].upper()} ({best_f1[1]['f1_score']:.4f})")
        
        return results
    
    def print_summary(self, results):
        """Print a comprehensive summary of all results in English and isiZulu."""
        print("\n" + "="*70)
        print("üìà ISIFINYEZO ESIPHELELE (COMPREHENSIVE SUMMARY)")
        print("="*70)
        
        for benchmark, data in results.items():
            print(f"\n{benchmark.upper()}:")
            print(f"  {'‚îÄ'*60}")
            print(f"  Amamethrikhi Wokuhlobana (Correlation Metrics):")
            print(f"    Spearman œÅ:   {data['spearman']:.4f}")
            print(f"    Pearson r:    {data['pearson']:.4f}")
            print(f"    Ukumbozwa:    {data['coverage']:.1f}%")
            
            if 'accuracy' in data:
                print(f"\n  Amamethrikhi Okuhlukanisa (Classification Metrics):")
                print(f"    Ukunemba (Accuracy):    {data['accuracy']:.4f}")
                print(f"    Ukunembile (Precision): {data['precision']:.4f}")
                print(f"    Ukukhumbula (Recall):   {data['recall']:.4f}")
                print(f"    I-F1 Score:             {data['f1_score']:.4f}")


def create_sample_isizulu_dataset():
    """Create a sample isiZulu word similarity dataset for testing."""
    sample_data = """igama1,igama2,isilinganiso
umfazi,indoda,5.2
ingane,umntwana,9.5
inja,ikati,6.8
indlu,ikhaya,8.7
umfula,ulwandle,6.5
isikole,isikhungo,7.8
imali,uhulumeni,4.2
uthisha,umfundi,7.5
isitsha,indishi,9.2
ibhola,umdlalo,7.8
ukudla,ukuphuza,6.5
usuku,ubusuku,3.2
umuntu,ubuntu,8.5
itheku,idolobha,9.1
izulu,umoya,6.8"""
    
    with open('isizulu_similarity_sample.csv', 'w', encoding='utf-8') as f:
        f.write(sample_data)
    
    print("‚úÖ Kwakhiwe i-isizulu_similarity_sample.csv (Created isiZulu sample dataset)")
    return 'isizulu_similarity_sample.csv'


# Demo usage
if __name__ == "__main__":
    print("="*70)
    print("üìä IsiZulu Word Similarity Benchmark Evaluator - mBERT VERSION")
    print("   Okuqukethwe: Precision, Recall, Accuracy, ne-F1 Score")
    print("="*70)
    
    print("\n‚úÖ mBERT-compatible implementation!")
    print("‚úÖ Supports contextualized embeddings with multiple pooling strategies")
    print("‚úÖ Can compare different BERT layers")
    print("‚úÖ Ready to use with multilingual BERT!")
    
    print("\n" + "="*70)
    print("üìñ QUICK START")
    print("="*70)
    
    print("""
from isizulu_benchmark_evaluator_mbert import IsiZuluBenchmarkEvaluatorMBERT

# Create evaluator (will automatically load mBERT)
evaluator = IsiZuluBenchmarkEvaluatorMBERT(
    pooling='mean',  # or 'first', 'last', 'max'
    layer=-1         # -1 = last layer, -2 = second-to-last, etc.
)

# Evaluate on your isiZulu dataset
results = evaluator.evaluate_isizulu_dataset(
    'isizulu_word_similarity.csv',
    compute_classification=True,
    threshold='median'
)

# Print all metrics
print(f"Spearman œÅ: {results['spearman']:.4f}")
print(f"Accuracy: {results['accuracy']:.4f}")
print(f"Precision: {results['precision']:.4f}")
print(f"Recall: {results['recall']:.4f}")
print(f"F1 Score: {results['f1_score']:.4f}")

# Compare pooling strategies
word_pairs = evaluator.load_isizulu_similarity_dataset('isizulu_word_similarity.csv')
pooling_results = evaluator.compare_pooling_strategies(word_pairs)

# Compare BERT layers
layer_results = evaluator.compare_layers(word_pairs, layers=[-4, -3, -2, -1])
""")
    
    print("\n" + "="*70)
    print("üß™ Running DEMO with sample data...")
    print("="*70)
    
    # Create sample dataset
    sample_file = create_sample_isizulu_dataset()
    
    # Initialize evaluator
    print("\nüì• Initializing mBERT evaluator...")
    evaluator = IsiZuluBenchmarkEvaluatorMBERT(pooling='mean', layer=-1)
    
    # Evaluate sample dataset
    print("\nüìä Evaluating sample isiZulu dataset...")
    results = evaluator.evaluate_isizulu_dataset(
        sample_file, 
        compute_classification=True, 
        threshold='median'
    )
    
    # Print summary
    if results:
        evaluator.print_summary({'Sample Dataset': results})
    
    # Compare pooling strategies (optional, comment out if too slow)
    print("\n" + "="*70)
    print("üî¨ Comparing pooling strategies (this may take a minute)...")
    print("="*70)
    
    word_pairs = evaluator.load_isizulu_similarity_dataset(sample_file)
    if word_pairs:
        pooling_results = evaluator.compare_pooling_strategies(
            word_pairs[:5],  # Use only first 5 pairs for demo
            pooling_methods=['mean', 'first']
        )
    
    print("\nüöÄ Demo evaluation completed!")
    print("\nüí° TIP: For full evaluation, use your real isiZulu similarity dataset")
    print("   and consider running overnight for large datasets (mBERT is slower than Word2Vec)")

ModuleNotFoundError: No module named 'torch'