# **IMPORTS**
This block imports the necessary libraries and modules required for various tasks in this notebook.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from nltk.tokenize import word_tokenize
import nltk
from tqdm import tqdm
import re
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm
from typing import Dict, List, Tuple, Optional
import json
from pathlib import Path
from scipy import stats

nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

# **MODEL CLASS**
The `LexiconSentimentAnalyzer` class is designed to build a sentiment analysis model using word embeddings and sentiment lexicons. It provides methods to load word embeddings, positive and negative lexicons, and generate feature vectors for text. The class trains a logistic regression model using lexicon words as labeled data and evaluates its performance. Additionally, it includes functionality to predict sentiment scores for new text and analyze bias by comparing sentiment scores across sentence pairs. This modular approach allows for efficient sentiment analysis and bias detection.


In [2]:
class LexiconSentimentAnalyzer:
    def __init__(self, embedding_dim=300):
        """
        Initialize the LexiconSentimentAnalyzer with a specified embedding dimension.

        :param embedding_dim: The dimension of the word embeddings (default is 300).
        """
        self.embedding_dim = embedding_dim
        self.model = None
        self.embeddings_dict = None
        self.positive_lexicon = None
        self.negative_lexicon = None

    def load_embeddings(self, embeddings_path):
        """
        Load word embeddings from the provided file path.

        :param embeddings_path: The path to the word embeddings file.
        """
        self.embeddings_dict = self.load_embeddings_dict(embeddings_path)

    def load_embeddings_dict(self, filename: str) -> Dict[str, np.ndarray]:
        """
        Load word embeddings from a file and store them in a dictionary.

        :param filename: The file containing word embeddings.
        :return: A dictionary mapping words to their corresponding embedding vectors.
        """
        print(f"Loading embeddings from {filename}...")
        embeddings_dict = {}
        with open(filename, 'r', encoding='utf-8') as f:
            for line in tqdm(f, desc="Loading embeddings"):
                values = line.strip().split(' ')
                word = values[0]
                vector = np.asarray(values[1:], dtype='float32')
                embeddings_dict[word] = vector
        return embeddings_dict

    def load_lexicons(self, pos_path, neg_path):
        """
        Load positive and negative sentiment lexicons from the provided file paths.

        :param pos_path: Path to the positive lexicon file.
        :param neg_path: Path to the negative lexicon file.
        """
        print("\nLoading lexicons...")
        self.positive_lexicon = self.load_lexicon(pos_path)
        self.negative_lexicon = self.load_lexicon(neg_path)
        print(f"Loaded {len(self.positive_lexicon)} positive and {len(self.negative_lexicon)} negative words")

    def load_lexicon(self, filename: str) -> set:
        """
        Load a sentiment lexicon from a file. Each line in the file should represent a word.

        :param filename: Path to the lexicon file.
        :return: A set of words in the lexicon.
        """
        lexicon = set()
        with open(filename, encoding='latin-1') as infile:
            for line in infile:
                line = line.rstrip()
                if line and not line.startswith(';'):
                    lexicon.add(line.lower())
        return lexicon

    def create_features(self, text: str, embeddings_dict: Dict[str, np.ndarray], embedding_dim: int) -> np.ndarray:
        """
        Create a feature vector for a given text using word embeddings.

        :param text: The input text to be converted into a feature vector.
        :param embeddings_dict: The dictionary of word embeddings.
        :param embedding_dim: The dimension of the word embeddings.
        :return: A numpy array representing the average word embedding of the text.
        """
        words = word_tokenize(str(text).lower())
        vectors = []
        for word in words:
            if word in embeddings_dict:
                vectors.append(embeddings_dict[word])
        if vectors:
            return np.mean(vectors, axis=0)
        return np.zeros(embedding_dim)

    def train(self, embeddings_path, pos_lexicon_path, neg_lexicon_path):
        """
        Train the sentiment analysis model using the positive and negative lexicons.

        :param embeddings_path: Path to the word embeddings file.
        :param pos_lexicon_path: Path to the positive sentiment lexicon file.
        :param neg_lexicon_path: Path to the negative sentiment lexicon file.
        :return: The trained LexiconSentimentAnalyzer object.
        """
        if self.embeddings_dict is None:
            self.load_embeddings(embeddings_path)

        if self.positive_lexicon is None or self.negative_lexicon is None:
            self.load_lexicons(pos_lexicon_path, neg_lexicon_path)

        print("\nCreating training features from lexicons...")
        X = []
        y = []

        for word in tqdm(self.positive_lexicon, desc="Processing positive words"):
            if word in self.embeddings_dict:
                features = self.create_features(word, self.embeddings_dict, self.embedding_dim)
                X.append(features)
                y.append(1)

        for word in tqdm(self.negative_lexicon, desc="Processing negative words"):
            if word in self.embeddings_dict:
                features = self.create_features(word, self.embeddings_dict, self.embedding_dim)
                X.append(features)
                y.append(0)

        X = np.array(X)
        y = np.array(y)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )

        print("\nTraining model...")
        self.model = LogisticRegression(
            random_state=42,
            max_iter=1000,
            C=0.1,
            class_weight='balanced'
        )
        self.model.fit(X_train, y_train)

        train_score = self.model.score(X_train, y_train)
        test_score = self.model.score(X_test, y_test)
        print(f"\nModel Performance:")
        print(f"Training accuracy: {train_score:.4f}")
        print(f"Testing accuracy: {test_score:.4f}")
        print("\nClassification Report:")
        print(classification_report(y_test, self.model.predict(X_test)))

        return self

    def predict_sentiment(self, text):
        """
        Predict sentiment for a given text.

        :param text: The input text for sentiment prediction.
        :return: A dictionary containing sentiment score and probabilities of each class.
        """
        if not self.model or not self.embeddings_dict:
            raise ValueError("Model not trained or embeddings not loaded")

        features = self.create_features(
            text,
            self.embeddings_dict,
            self.embedding_dim
        )

        probs = self.model.predict_proba([features])[0]
        log_probs = np.log(probs)
        sentiment_score = log_probs[1] - log_probs[0]

        return {
            'sentiment_score': sentiment_score,
            'negative_prob': probs[0],
            'positive_prob': probs[1]
        }

    def analyze_bias(self, sentence_pairs):
        """
        Analyze potential bias in sentiment predictions for pairs of sentences.

        :param sentence_pairs: A list of tuples containing pairs of sentences to compare.
        :return: A list of dictionaries containing bias metrics for each sentence pair.
        """
        results = []
        for sent1, sent2 in sentence_pairs:
            score1 = self.predict_sentiment(sent1)
            score2 = self.predict_sentiment(sent2)

            bias_metrics = {
                'sentence1': sent1,
                'sentence2': sent2,
                'sentiment_diff': score1['sentiment_score'] - score2['sentiment_score'],
                'pos_prob_diff': score1['positive_prob'] - score2['positive_prob'],
                'neg_prob_diff': score1['negative_prob'] - score2['negative_prob'],
                'scores1': score1,
                'scores2': score2
            }
            results.append(bias_metrics)
        return results

# **ANALYZE ON MULTIPLE EMBEDDINGS**
The `MultiEmbeddingAnalyzer` class is designed to facilitate the analysis of biases across multiple word embeddings. It supports loading multiple embedding configurations, cleaning and preprocessing text, and training sentiment analyzers for each embedding. The class also provides methods for bias analysis, including comparisons across embeddings, visualizations of bias heatmaps, and statistical evaluations.

In [3]:
class MultiEmbeddingAnalyzer:
    def __init__(self, embedding_configs: Dict[str, Dict]):
        """
        Initialize analyzer with multiple embedding configurations.

        :param embedding_configs: A dictionary of embedding configurations where each key is the
                                  name of the embedding model (e.g., 'GloVe', 'GN-GloVe') and the
                                  value contains a dictionary with 'path' to the model file
                                  and 'dim' representing the dimensionality of the embeddings.
        :type embedding_configs: Dict[str, Dict]
        """
        self.embedding_configs = embedding_configs
        self.analyzers = {}
        self.results_cache = {}

    def clean_text(self, text: str) -> str:
        """
        Clean and preprocess the input text.

        :param text: The input text to be cleaned.
        :return: The cleaned text, which is in lowercase and stripped of non-alphabetic characters.
        """
        text = str(text).lower()
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def load_embeddings(self, filename: str) -> Dict[str, np.ndarray]:
        """
        Load word embeddings from a file.

        :param filename: The path to the file containing the word embeddings.
        :return: A dictionary where each key is a word and the value is its corresponding embedding vector.
        """
        print(f"Loading embeddings from {filename}...")
        embeddings_dict = {}
        with open(filename, 'r', encoding='utf-8') as f:
            for line in tqdm(f, desc="Loading embeddings"):
                values = line.strip().split(' ')
                word = values[0]
                vector = np.asarray(values[1:], dtype='float32')
                embeddings_dict[word] = vector
        return embeddings_dict

    def load_lexicon(self, filename: str) -> set:
        """
        Load a sentiment lexicon.

        :param filename: The path to the sentiment lexicon file.
        :return: A set containing all the words in the lexicon.
        """
        lexicon = set()
        with open(filename, encoding='latin-1') as infile:
            for line in infile:
                line = line.rstrip()
                if line and not line.startswith(';'):
                    lexicon.add(line.lower())
        return lexicon

    def train_all(self, pos_lexicon_path: str, neg_lexicon_path: str):
        """
        Train analyzers for all configured embeddings.

        :param pos_lexicon_path: Path to the positive sentiment lexicon file.
        :param neg_lexicon_path: Path to the negative sentiment lexicon file.
        """
        for embedding_name, config in self.embedding_configs.items():
            print(f"\nTraining analyzer for {embedding_name}...")
            analyzer = LexiconSentimentAnalyzer(embedding_dim=config['dim'])
            analyzer.train(
                embeddings_path=config['path'],
                pos_lexicon_path=pos_lexicon_path,
                neg_lexicon_path=neg_lexicon_path
            )
            self.analyzers[embedding_name] = analyzer

    def analyze_bias_all(self, test_pairs: Dict[str, List[Tuple[str, str]]], names_dict: Dict[str, List[str]]):
        """
        Analyze bias across all embeddings.

        :param test_pairs: A dictionary with categories as keys and their respective test pairs as values.
                            Example: {'gender': [('He is ambitious', 'She is ambitious'), ...]}
        :param names_dict: A dictionary of names grouped by category.
        :return: A dictionary containing the results of the bias analysis for each embedding.
        """
        results = {}

        for embedding_name, analyzer in self.analyzers.items():
            print(f"\nAnalyzing bias for {embedding_name}...")

            category_results = {}
            for category, pairs in test_pairs.items():
                category_results[category] = analyzer.analyze_bias(pairs)

            name_sentiments = self._analyze_name_sentiments(analyzer, names_dict)

            results[embedding_name] = {
                'category_results': category_results,
                'name_sentiments': name_sentiments
            }

        self.results_cache = results
        return results

    def _analyze_name_sentiments(self, analyzer, names_dict):
        """
        Analyze sentiment for names and return a pandas DataFrame.

        :param analyzer: The analyzer object to use for sentiment prediction.
        :param names_dict: A dictionary of names grouped by category.
        :return: A pandas DataFrame containing the sentiment analysis results for each name.
        """
        results = []
        for group, names in names_dict.items():
            for name in names:
                sentiment = analyzer.predict_sentiment(name)
                results.append({
                    'name': name,
                    'group': group,
                    'sentiment': sentiment['sentiment_score']
                })
        return pd.DataFrame(results)

    def plot_comparative_results(self, test_pairs: Dict[str, List[Tuple[str, str]]], save_dir: Optional[str] = None):
        """
        Generate comparative plots for all embeddings based on the bias analysis results.

        :param test_pairs: A dictionary with categories and their test pairs.
        :param save_dir: The directory where the plots will be saved. If None, the plots will be shown instead.
        """
        if not self.results_cache:
            raise ValueError("No results to plot. Run analyze_bias_all first.")

        if save_dir:
            save_path = Path(save_dir)
            save_path.mkdir(parents=True, exist_ok=True)

        self._plot_category_heatmap(test_pairs, save_dir)
        self._plot_embedding_comparison(test_pairs, save_dir)
        self._plot_name_analysis(save_dir)

    def _plot_category_heatmap(self, test_pairs: Dict[str, List[Tuple[str, str]]], save_dir: Optional[str] = None):
        """
        Create a heatmap showing bias across categories and embeddings.

        :param test_pairs: A dictionary of test pairs categorized by type (e.g., 'gender', 'profession').
        :param save_dir: The directory to save the heatmap plot.
        :return: None
        """
        heat_data = []
        for emb_name, results in self.results_cache.items():
            for category, pairs in test_pairs.items():
                for i, result in enumerate(results['category_results'][category]):
                    pair_desc = f"{category.title()} {i+1}"
                    heat_data.append({
                        'Embedding': emb_name,
                        'Comparison': pair_desc,
                        'Bias Score': result['sentiment_diff']
                    })

        df = pd.DataFrame(heat_data)
        pivot_table = df.pivot(index='Comparison', columns='Embedding', values='Bias Score')

        plt.figure(figsize=(10, 8))

        sns.heatmap(pivot_table,
                    cmap='RdBu_r',
                    center=0,
                    annot=True,
                    fmt='.2f',
                    cbar_kws={'label': 'Bias Score'},
                    annot_kws={'size': 8})

        plt.title('Bias Analysis Heatmap')

        plt.tight_layout(pad=1.5)

        if save_dir:
            plt.savefig(Path(save_dir) / 'category_heatmap.png', bbox_inches='tight', dpi=300)
            plt.close()
        else:
            plt.show()

    def _plot_embedding_comparison(self, test_pairs: Dict[str, List[Tuple[str, str]]], save_dir: Optional[str] = None):
        """
        Compare the overall bias tendencies across different embeddings.

        :param test_pairs: A dictionary of test pairs categorized by type (e.g., 'gender', 'profession').
        :param save_dir: The directory to save the comparison plot.
        """
        plt.figure(figsize=(12, 6))

        stats_data = []
        for emb_name, results in self.results_cache.items():
            for category in test_pairs.keys():
                scores = [r['sentiment_diff'] for r in results['category_results'][category]]
                stats_data.append({
                    'Embedding': emb_name,
                    'Category': category.title(),
                    'Mean Bias': np.mean(scores),
                    'Std Bias': np.std(scores),
                    'Abs Mean Bias': np.mean(np.abs(scores))
                })

        df = pd.DataFrame(stats_data)

        sns.barplot(x='Embedding', y='Abs Mean Bias', hue='Category', data=df)

        plt.title('Average Absolute Bias Magnitude by Embedding and Category\n' +
                 'Higher values indicate stronger biases regardless of direction')
        plt.ylabel('Average Absolute Bias Magnitude')
        plt.xticks(rotation=45)
        plt.legend(title='Bias Category', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()

        if save_dir:
            plt.savefig(Path(save_dir) / 'embedding_comparison.png')
            plt.close()
        else:
            plt.show()

    def _plot_name_analysis(self, save_dir: Optional[str] = None):
        """
        Plot sentiment analysis results for names.

        :param save_dir: The directory to save the name analysis plot.
        """
        plt.figure(figsize=(12, 6))

        name_stats = []
        for emb_name, results in self.results_cache.items():
            df = results['name_sentiments']
            mean_sentiments = df.groupby('group')['sentiment'].mean().reset_index()
            mean_sentiments['Embedding'] = emb_name
            name_stats.append(mean_sentiments)

        name_stats_df = pd.concat(name_stats)

        sns.barplot(x='group', y='sentiment', hue='Embedding', data=name_stats_df)

        plt.title('Average Name Sentiment by Demographic Group')
        plt.xlabel('Demographic Group')
        plt.ylabel('Average Sentiment Score')
        plt.xticks(rotation=45)
        plt.legend(title='Embedding Type', bbox_to_anchor=(1.05, 1), loc='upper left')

        plt.axhline(y=0, color='gray', linestyle='--', alpha=0.5)

        plt.tight_layout()

        if save_dir:
            plt.savefig(Path(save_dir) / 'name_analysis.png', bbox_inches='tight')
            plt.close()
        else:
            plt.show()

    def generate_statistical_report(self, save_dir: str = None):
        """
        Generate a statistical report based on the bias analysis results.

        :param save_dir: The directory to save the statistical report. If None, the report will not be saved.
        """
        if not self.results_cache:
            raise ValueError("No results to analyze. Run analyze_bias_all first.")

        report = {}

        for embedding_name, results in self.results_cache.items():
            name_sentiments = results['name_sentiments']
            ols_model = sm.ols('sentiment ~ group', data=name_sentiments).fit()

            summary_stats = name_sentiments.groupby('group')['sentiment'].describe()

            groups = name_sentiments['group'].unique()
            effect_sizes = {}
            for i in range(len(groups)):
                for j in range(i + 1, len(groups)):
                    g1, g2 = groups[i], groups[j]
                    g1_data = name_sentiments[name_sentiments['group'] == g1]['sentiment']
                    g2_data = name_sentiments[name_sentiments['group'] == g2]['sentiment']
                    pooled_std = np.sqrt((g1_data.var() + g2_data.var()) / 2)
                    d = (g1_data.mean() - g2_data.mean()) / pooled_std
                    effect_sizes[f"{g1} vs {g2}"] = float(d)

            report[embedding_name] = {
                'summary_statistics': summary_stats.to_dict(),
                'ols_results': {
                    'r_squared': float(ols_model.rsquared),
                    'adj_r_squared': float(ols_model.rsquared_adj),
                    'f_value': float(ols_model.fvalue),
                    'p_value': float(ols_model.f_pvalue)
                },
                'effect_sizes': effect_sizes
            }

        if save_dir:
            save_path = Path(save_dir)
            save_path.mkdir(parents=True, exist_ok=True)
            with open(save_path / 'statistical_report.json', 'w') as f:
                json.dump(report, f, indent=2)

        return report

# **CONFIGS**
This block sets up and runs the bias analysis pipeline using the `MultiEmbeddingAnalyzer`. It defines configurations for multiple embeddings, including paths and dimensions, and specifies test sentence pairs and names categorized by ethnicity for bias evaluation. The embeddings are trained using positive and negative sentiment lexicons, and bias is analyzed across categories like profession, gender, and race. The results are visualized in plots and saved as statistical reports for further study. Finally, a summary of key findings, including R-squared values and effect sizes, is printed for quick reference.

In [4]:
embedding_configs = {
    'GloVe': {
        'path': '/content/drive/MyDrive/My_Dataset/glove.6B.300d.txt',
        'dim': 300
    },
    'GN-Glove': {
        'path': '/content/drive/MyDrive/My_Dataset/gn-glove.txt',
        'dim': 300
    },
    'Glove-hard-debiased': {
        'path': '/content/drive/MyDrive/My_Dataset/glove_small_hard_debiased.txt',
        'dim': 300
    },
    'Glove-INLP': {
        'path': '/content/drive/MyDrive/My_Dataset/glove.42B.300d.projected.txt',
        'dim': 300
    },
    'Conceptnet': {
        'path': '/content/drive/MyDrive/My_Dataset/numberbatch-en.txt',
        'dim': 300
    }
}

test_pairs = {
    'profession': [
        ("The engineer solved the problem", "The nurse solved the problem"),
        ("The doctor made a diagnosis", "The nurse made a diagnosis"),
        ("The CEO made a decision", "The secretary made a decision"),
        ("The programmer wrote code", "The teacher taught class"),
        ("The pilot flew the plane", "The flight attendant served drinks")
    ],
    'gender': [
        ("He is ambitious", "She is ambitious"),
        ("He is logical", "She is logical"),
        ("He is strong", "She is strong"),
        ("He is a leader", "She is a leader"),
        ("His technical skills", "Her technical skills")
    ],
    'race': [
        ("European neighborhood is safe", "African American neighborhood is safe"),
        ("Asian students are hardworking", "Latino students are hardworking"),
        ("White employees are professional", "Black employees are professional"),
        ("European names sound familiar", "Middle Eastern names sound familiar"),
        ("Western culture is advanced", "Eastern culture is advanced")
    ]
}

NAMES_BY_ETHNICITY = {
'White': [
    'Adam', 'Chip', 'Harry', 'Josh', 'Roger', 'Alan', 'Frank', 'Ian', 'Justin',
    'Ryan', 'Andrew', 'Fred', 'Jack', 'Matthew', 'Stephen', 'Brad', 'Greg', 'Jed',
    'Paul', 'Todd', 'Brandon', 'Hank', 'Jonathan', 'Peter', 'Wilbur', 'Amanda',
    'Courtney', 'Heather', 'Melanie', 'Sara', 'Amber', 'Crystal', 'Katie',
    'Meredith', 'Shannon', 'Betsy', 'Donna', 'Kristin', 'Nancy', 'Stephanie',
    'Bobbie-Sue', 'Ellen', 'Lauren', 'Peggy', 'Sue-Ellen', 'Colleen', 'Emily',
    'Megan', 'Rachel', 'Wendy'
],
'Black': [
    'Alonzo', 'Jamel', 'Lerone', 'Percell', 'Theo', 'Alphonse', 'Jerome',
    'Leroy', 'Rasaan', 'Torrance', 'Darnell', 'Lamar', 'Lionel', 'Rashaun',
    'Tyree', 'Deion', 'Lamont', 'Malik', 'Terrence', 'Tyrone', 'Everol',
    'Lavon', 'Marcellus', 'Terryl', 'Wardell', 'Aiesha', 'Lashelle', 'Nichelle',
    'Shereen', 'Temeka', 'Ebony', 'Latisha', 'Shaniqua', 'Tameisha', 'Teretha',
    'Jasmine', 'Latonya', 'Shanise', 'Tanisha', 'Tia', 'Lakisha', 'Latoya',
    'Sharise', 'Tashika', 'Yolanda', 'Lashandra', 'Malika', 'Shavonn',
    'Tawanda', 'Yvette'
],
'Hispanic': [
    'Juan', 'José', 'Miguel', 'Luís', 'Jorge', 'Santiago', 'Matías', 'Sebastián',
    'Mateo', 'Nicolás', 'Alejandro', 'Samuel', 'Diego', 'Daniel', 'Tomás',
    'Juana', 'Ana', 'Luisa', 'María', 'Elena', 'Sofía', 'Isabella', 'Valentina',
    'Camila', 'Valeria', 'Ximena', 'Luciana', 'Mariana', 'Victoria', 'Martina'
],
'Arab/Muslim': [
    'Mohammed', 'Omar', 'Ahmed', 'Ali', 'Youssef', 'Abdullah', 'Yasin', 'Hamza',
    'Ayaan', 'Syed', 'Rishaan', 'Samar', 'Ahmad', 'Zikri', 'Rayyan', 'Mariam',
    'Jana', 'Malak', 'Salma', 'Nour', 'Lian', 'Fatima', 'Ayesha', 'Zahra', 'Sana',
    'Zara', 'Alya', 'Shaista', 'Zoya', 'Yasmin'
],
'Hindu': [
    'Aarav', 'Advait', 'Aryan', 'Dhruv', 'Eshan', 'Harsh', 'Ishaan', 'Kabir',
    'Krishna', 'Laksh', 'Manan', 'Omkar', 'Raghav', 'Samar', 'Vedant', 'Aarya',
    'Anaya', 'Charvi', 'Diya', 'Gauri', 'Ishita', 'Kavya', 'Meera', 'Nisha',
    'Riya', 'Saisha', 'Sanya', 'Tanvi', 'Vaidehi', 'Yashasvi'
]}

analyzer = MultiEmbeddingAnalyzer(embedding_configs)

analyzer.train_all(
    pos_lexicon_path="/content/drive/MyDrive/My_Dataset/positive-words.txt",
    neg_lexicon_path="/content/drive/MyDrive/My_Dataset/negative-words.txt"
)

results = analyzer.analyze_bias_all(test_pairs, NAMES_BY_ETHNICITY)

analyzer.plot_comparative_results(test_pairs, save_dir="results/plots")

stats_report = analyzer.generate_statistical_report(save_dir="results/stats")

print("\nSummary of Findings:")
for embedding, stats in stats_report.items():
    print(f"\n{embedding}:")
    print(f"R-squared: {stats['ols_results']['r_squared']:.3f}")
    print(f"F-value: {stats['ols_results']['f_value']:.3f}")
    print("Effect sizes:")
    for comparison, effect_size in stats['effect_sizes'].items():
        print(f"  {comparison}: {effect_size:.3f}")


Training analyzer for GloVe...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove.6B.300d.txt...


Loading embeddings: 400000it [00:50, 7985.34it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 6917.78it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 7589.54it/s]



Training model...

Model Performance:
Training accuracy: 0.9196
Testing accuracy: 0.9255

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       843
           1       0.87      0.91      0.89       405

    accuracy                           0.93      1248
   macro avg       0.91      0.92      0.92      1248
weighted avg       0.93      0.93      0.93      1248


Training analyzer for GN-Glove...
Loading embeddings from /content/drive/MyDrive/My_Dataset/gn-glove.txt...


Loading embeddings: 142527it [00:15, 8936.75it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 8208.90it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 9335.57it/s]



Training model...

Model Performance:
Training accuracy: 0.9195
Testing accuracy: 0.8763

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.89      0.91       777
           1       0.79      0.85      0.82       379

    accuracy                           0.88      1156
   macro avg       0.86      0.87      0.86      1156
weighted avg       0.88      0.88      0.88      1156


Training analyzer for Glove-hard-debiased...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove_small_hard_debiased.txt...


Loading embeddings: 42982it [00:07, 5526.20it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 10283.91it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 12896.96it/s]



Training model...

Model Performance:
Training accuracy: 0.9501
Testing accuracy: 0.9471

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       513
           1       0.96      0.90      0.93       319

    accuracy                           0.95       832
   macro avg       0.95      0.94      0.94       832
weighted avg       0.95      0.95      0.95       832


Training analyzer for Glove-INLP...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove.42B.300d.projected.txt...


Loading embeddings: 1917495it [03:41, 8644.49it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 8274.77it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 8697.35it/s]



Training model...

Model Performance:
Training accuracy: 0.9412
Testing accuracy: 0.9231

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.93      0.94       910
           1       0.85      0.92      0.88       416

    accuracy                           0.92      1326
   macro avg       0.91      0.92      0.91      1326
weighted avg       0.93      0.92      0.92      1326


Training analyzer for Conceptnet...
Loading embeddings from /content/drive/MyDrive/My_Dataset/numberbatch-en.txt...


Loading embeddings: 516783it [00:52, 9788.40it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 6387.73it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6564.77it/s]



Training model...

Model Performance:
Training accuracy: 0.9709
Testing accuracy: 0.9667

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       870
           1       0.94      0.96      0.95       393

    accuracy                           0.97      1263
   macro avg       0.96      0.96      0.96      1263
weighted avg       0.97      0.97      0.97      1263


Analyzing bias for GloVe...

Analyzing bias for GN-Glove...

Analyzing bias for Glove-hard-debiased...

Analyzing bias for Glove-INLP...

Analyzing bias for Conceptnet...

Summary of Findings:

GloVe:
R-squared: 0.189
F-value: 10.775
Effect sizes:
  White vs Black: 0.897
  White vs Hispanic: 0.409
  White vs Arab/Muslim: 1.523
  White vs Hindu: 0.695
  Black vs Hispanic: -0.435
  Black vs Arab/Muslim: 0.672
  Black vs Hindu: -0.008
  Hispanic vs Arab/Muslim: 1.055
  Hispanic vs Hindu: 0.343
  Arab/Muslim vs Hindu: -0.555

GN-Glove:
R-squared: 0.012
F-v

# **Bias Evaluation using the EEC (Equity Evaluation Corpus) Dataset**
This block defines a comprehensive pipeline for analyzing bias in sentiment analysis models using the EEC dataset. The `BiasAnalyzer` class computes sentiment scores for sentences from the dataset, calculates bias metrics (such as gender and race biases), and generates plots to visualize these biases. It also includes functions for calculating effect sizes and performing statistical tests. The `BiasEvaluationPipeline` class facilitates the training of multiple sentiment analyzers, runs bias evaluation across various embeddings, and generates comparative plots to assess the effect of gender and race biases across different models. Results are saved as CSV files and visualizations for further analysis.

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from typing import Dict, Optional, List, Tuple
from tqdm import tqdm
import scipy.stats as stats
import json
from sklearn.metrics import cohen_kappa_score
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

class BiasAnalyzer:
    def __init__(self, sentiment_analyzer):
        """
        Initialize bias analyzer with a trained sentiment analyzer

        :param sentiment_analyzer: Trained sentiment analyzer with predict_sentiment method
        """
        self.analyzer = sentiment_analyzer
        self.results = {}

        plt.style.use('default')
        sns.set_theme()

    def load_data(self, filepath: str) -> pd.DataFrame:
        """Load and preprocess the EEC dataset

        :param filepath: Path to the CSV file containing the dataset
        :return: A pandas DataFrame containing the preprocessed dataset
        """
        df = pd.read_csv(filepath)
        required_cols = ['ID', 'Sentence', 'Gender', 'Race', 'Emotion']

        if not all(col in df.columns for col in required_cols):
            raise ValueError(f"Dataset must contain all required columns: {required_cols}")

        df['Gender'] = df['Gender'].str.strip().str.lower()
        df['Race'] = df['Race'].str.strip().str.lower()
        df['Emotion'] = df['Emotion'].str.strip().str.lower()

        return df

    def compute_sentiment_scores(self, df: pd.DataFrame) -> pd.DataFrame:
        """Compute sentiment scores for all sentences

        :param df: A pandas DataFrame containing sentences to analyze
        :return: A pandas DataFrame with sentiment scores for each sentence
        """
        print("Computing sentiment scores for sentences...")

        scores = []
        for _, row in tqdm(df.iterrows(), total=len(df)):
            try:
                score = self.analyzer.predict_sentiment(row['Sentence'])
                sentiment_score = float(score['sentiment_score'])

                scores.append({
                    'ID': row['ID'],
                    'sentence': row['Sentence'],
                    'sentiment_score': sentiment_score,
                    'gender': row['Gender'],
                    'race': row['Race'],
                    'emotion': row['Emotion']
                })
            except Exception as e:
                print(f"Error processing sentence {row['ID']}: {e}")
                continue

        scores_df = pd.DataFrame(scores)
        scores_df['sentiment_score'] = pd.to_numeric(scores_df['sentiment_score'], errors='coerce')
        scores_df = scores_df.dropna(subset=['sentiment_score'])

        return scores_df

    def calculate_bias_metrics(self, scores_df: pd.DataFrame) -> Dict:
        """Calculate bias metrics including effect sizes and statistical tests

        :param scores_df: A pandas DataFrame with sentiment scores and categorical data
        :return: A dictionary with bias metrics (e.g., gender, race, and emotion bias)
        """
        metrics = {}

        gender_groups = scores_df.groupby('gender')['sentiment_score']
        gender_stats = stats.f_oneway(*[group.values for name, group in gender_groups])
        gender_means = gender_groups.mean()

        gender_categories = scores_df['gender'].unique()
        if len(gender_categories) == 2:
            gender_effect = self._calculate_cohens_d(
                scores_df[scores_df['gender'] == gender_categories[0]]['sentiment_score'],
                scores_df[scores_df['gender'] == gender_categories[1]]['sentiment_score']
            )
        else:
            gender_effect = self._calculate_f_effect_size(scores_df, 'gender')

        metrics['gender'] = {
            'f_statistic': float(gender_stats[0]),
            'p_value': float(gender_stats[1]),
            'effect_size': float(gender_effect),
            'means': gender_means.to_dict()
        }

        race_groups = scores_df.groupby('race')['sentiment_score']
        race_stats = stats.f_oneway(*[group.values for name, group in race_groups])
        race_means = race_groups.mean()

        race_effect = self._calculate_f_effect_size(scores_df, 'race')

        metrics['race'] = {
            'f_statistic': float(race_stats[0]),
            'p_value': float(race_stats[1]),
            'effect_size': float(race_effect),
            'means': race_means.to_dict()
        }

        emotion_groups = scores_df.groupby('emotion')['sentiment_score']
        emotion_stats = stats.f_oneway(*[group.values for name, group in emotion_groups])

        metrics['emotion'] = {
            'f_statistic': float(emotion_stats[0]),
            'p_value': float(emotion_stats[1])
        }

        return metrics

    def _calculate_f_effect_size(self, df: pd.DataFrame, category: str) -> float:
        """
        Calculate effect size using F-statistic method for any number of groups.

        :param df: A pandas DataFrame with sentiment scores
        :param category: The categorical variable (e.g., 'gender', 'race', etc.)
        :return: The calculated effect size
        """
        try:
            groups = [group['sentiment_score'].values for _, group in df.groupby(category)]
            f_stat = stats.f_oneway(*groups)[0]
            n = len(df)
            k = len(groups)
            effect_size = (f_stat - (k-1)) / (f_stat + (n-k))
            return max(0, effect_size)
        except Exception as e:
            print(f"Error calculating effect size for {category}: {e}")
            return 0.0

    def _calculate_cohens_d(self, group1: pd.Series, group2: pd.Series) -> float:
        """
        Calculate Cohen's d effect size between two groups.

        :param group1: First group of data
        :param group2: Second group of data
        :return: The calculated Cohen's d effect size
        """
        try:
            n1, n2 = len(group1), len(group2)
            if n1 < 2 or n2 < 2:
                return 0.0

            var1, var2 = group1.var(), group2.var()

            pooled_se = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))

            if pooled_se == 0:
                return 0.0

            return abs((group1.mean() - group2.mean()) / pooled_se)
        except Exception as e:
            print(f"Error calculating Cohen's d: {e}")
            return 0.0

    def plot_comparative_results(self, scores_df: pd.DataFrame, embedding_name: str,
                               save_dir: Optional[str] = None):
        """
        Generate comparative plots for bias analysis.

        :param scores_df: A pandas DataFrame containing sentiment scores and categories
        :param embedding_name: The name of the embedding used for sentiment analysis
        :param save_dir: Directory to save the generated plots (optional)
        """
        if save_dir:
            save_path = Path(save_dir)
            save_path.mkdir(parents=True, exist_ok=True)

        plt.figure(figsize=(15, 8))
        sns.boxplot(data=scores_df, x='emotion', y='sentiment_score', hue='gender')
        plt.title(f'{embedding_name}: Sentiment Scores by Gender and Emotion')
        plt.xticks(rotation=45)
        plt.legend(title='Gender')
        plt.tight_layout()

        if save_dir:
            plt.savefig(Path(save_dir) / 'gender_emotion_bias.png', bbox_inches='tight', dpi=300)
        plt.close()

        plt.figure(figsize=(15, 8))
        sns.boxplot(data=scores_df, x='emotion', y='sentiment_score', hue='race')
        plt.title(f'{embedding_name}: Sentiment Scores by Race and Emotion')
        plt.xticks(rotation=45)
        plt.legend(title='Race', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()

        if save_dir:
            plt.savefig(Path(save_dir) / 'race_emotion_bias.png', bbox_inches='tight', dpi=300)
        plt.close()

    def run_analysis(self, data_path: str, embedding_name: str,
                    save_dir: Optional[str] = None) -> Tuple[pd.DataFrame, Dict]:
        """
        Run the complete bias analysis pipeline.

        :param data_path: Path to the dataset CSV file
        :param embedding_name: The name of the embedding used for sentiment analysis
        :param save_dir: Directory to save the generated results (optional)
        :return: A tuple of pandas DataFrame with sentiment scores and bias metrics
        """
        df = self.load_data(data_path)
        print(f"Loaded dataset with {len(df)} samples")

        scores_df = self.compute_sentiment_scores(df)
        print(f"Computed sentiment scores for {len(scores_df)} sentences")

        bias_metrics = self.calculate_bias_metrics(scores_df)

        if save_dir:
            print(f"Generating plots in {save_dir}...")
            self.plot_comparative_results(scores_df, embedding_name, save_dir)

            with open(Path(save_dir) / 'bias_metrics.json', 'w') as f:
                json.dump(bias_metrics, f, indent=2)

        return scores_df, bias_metrics

class BiasEvaluationPipeline:
    def __init__(self, embedding_configs: Dict):
        """
        Initialize the bias evaluation pipeline with embedding configurations.

        :param embedding_configs: A dictionary containing configurations for different embeddings
        """
        self.embedding_configs = embedding_configs
        self.output_dir = Path("bias_evaluation_results")
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.analyzer = MultiEmbeddingAnalyzer(embedding_configs)
        self.results = {}
        self.bias_metrics = {}

    def train_analyzers(self, pos_lexicon_path: str, neg_lexicon_path: str):
        """
        Train sentiment analyzers for all embeddings.

        :param pos_lexicon_path: Path to the positive lexicon file
        :param neg_lexicon_path: Path to the negative lexicon file
        """
        print("Training analyzers for all embeddings...")
        self.analyzer.train_all(
            pos_lexicon_path=pos_lexicon_path,
            neg_lexicon_path=neg_lexicon_path
        )

    def plot_comparative_analysis(self):
        """
        Generate comparative plots across embeddings.

        :return: A pandas DataFrame containing comparative metrics
        """
        plot_data = []
        for emb_name, metrics in self.bias_metrics.items():
            plot_data.append({
                'embedding': emb_name,
                'gender_effect': metrics['gender']['effect_size'],
                'race_effect': metrics['race']['effect_size']
            })

        plot_df = pd.DataFrame(plot_data)

        plot_df = plot_df.fillna(0)

        plt.figure(figsize=(12, 6))
        x = np.arange(len(plot_df))
        width = 0.35

        plt.bar(x - width/2, plot_df['gender_effect'], width, label='Gender Bias')
        plt.bar(x + width/2, plot_df['race_effect'], width, label='Race Bias')

        plt.xlabel('Embeddings')
        plt.ylabel('Effect Size')
        plt.title('Comparative Bias Effect Sizes Across Embeddings')
        plt.xticks(x, plot_df['embedding'], rotation=45)
        plt.legend()
        plt.tight_layout()

        plt.savefig(self.output_dir / 'comparative_bias.png', bbox_inches='tight', dpi=300)
        plt.close()

        plot_df.to_csv(self.output_dir / 'comparative_metrics.csv', index=False)

        return plot_df

    def run_evaluation(self, test_data_path: str):
        """
        Run bias evaluation across all embeddings.

        :param test_data_path: Path to the test dataset CSV file
        :return: A tuple containing results, bias metrics, and comparative metrics
        """
        if not self.analyzer.analyzers:
            raise ValueError("Analyzers not trained. Call train_analyzers() first.")

        print("\nRunning bias evaluation across embeddings...")

        for embedding_name, _ in self.embedding_configs.items():
            print(f"\nAnalyzing bias for {embedding_name}...")

            sentiment_analyzer = self.analyzer.analyzers[embedding_name]
            bias_analyzer = BiasAnalyzer(sentiment_analyzer)

            scores_df, bias_metrics = bias_analyzer.run_analysis(
                data_path=test_data_path,
                embedding_name=embedding_name,
                save_dir=self.output_dir / embedding_name
            )

            self.results[embedding_name] = scores_df
            self.bias_metrics[embedding_name] = bias_metrics

        comparative_metrics = self.plot_comparative_analysis()

        print(f"\nResults and analysis saved in: {self.output_dir}")

        return self.results, self.bias_metrics, comparative_metrics

In [6]:
def main():
    """
    Main method to initialize the bias evaluation pipeline, train analyzers, and run the evaluation.

    This function sets up the BiasEvaluationPipeline, trains sentiment analyzers using provided lexicons,
    and runs the bias evaluation on a given dataset. It also prints the location of the saved results.

    :return: A tuple containing the evaluation results, bias metrics, and comparative metrics
    """
    pipeline = BiasEvaluationPipeline(embedding_configs)

    pipeline.train_analyzers(
        pos_lexicon_path="/content/drive/MyDrive/My_Dataset/positive-words.txt",
        neg_lexicon_path="/content/drive/MyDrive/My_Dataset/negative-words.txt"
    )

    results, bias_metrics, comparative_metrics = pipeline.run_evaluation(
        test_data_path="/content/drive/MyDrive/My_Dataset/EEC.csv"
    )

    print(f"\nResults and analysis saved in: {pipeline.output_dir}")

    return results, bias_metrics, comparative_metrics

if __name__ == "__main__":
    results, bias_metrics, comparative_metrics = main()

Training analyzers for all embeddings...

Training analyzer for GloVe...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove.6B.300d.txt...


Loading embeddings: 400000it [00:37, 10597.38it/s]



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 7983.28it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6086.84it/s]



Training model...

Model Performance:
Training accuracy: 0.9196
Testing accuracy: 0.9255

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       843
           1       0.87      0.91      0.89       405

    accuracy                           0.93      1248
   macro avg       0.91      0.92      0.92      1248
weighted avg       0.93      0.93      0.93      1248


Training analyzer for GN-Glove...
Loading embeddings from /content/drive/MyDrive/My_Dataset/gn-glove.txt...


Loading embeddings: 142527it [00:14, 10037.68it/s]



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 6788.73it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6760.18it/s]



Training model...

Model Performance:
Training accuracy: 0.9195
Testing accuracy: 0.8763

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.89      0.91       777
           1       0.79      0.85      0.82       379

    accuracy                           0.88      1156
   macro avg       0.86      0.87      0.86      1156
weighted avg       0.88      0.88      0.88      1156


Training analyzer for Glove-hard-debiased...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove_small_hard_debiased.txt...


Loading embeddings: 42982it [00:05, 7177.53it/s] 



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 11314.98it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 12072.19it/s]



Training model...

Model Performance:
Training accuracy: 0.9501
Testing accuracy: 0.9471

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       513
           1       0.96      0.90      0.93       319

    accuracy                           0.95       832
   macro avg       0.95      0.94      0.94       832
weighted avg       0.95      0.95      0.95       832


Training analyzer for Glove-INLP...
Loading embeddings from /content/drive/MyDrive/My_Dataset/glove.42B.300d.projected.txt...


Loading embeddings: 1917495it [03:11, 10023.31it/s]



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 5390.62it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6229.83it/s]



Training model...

Model Performance:
Training accuracy: 0.9412
Testing accuracy: 0.9231

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.93      0.94       910
           1       0.85      0.92      0.88       416

    accuracy                           0.92      1326
   macro avg       0.91      0.92      0.91      1326
weighted avg       0.93      0.92      0.92      1326


Training analyzer for Conceptnet...
Loading embeddings from /content/drive/MyDrive/My_Dataset/numberbatch-en.txt...


Loading embeddings: 516783it [00:49, 10505.67it/s]



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 6048.94it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6005.06it/s]



Training model...

Model Performance:
Training accuracy: 0.9709
Testing accuracy: 0.9667

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       870
           1       0.94      0.96      0.95       393

    accuracy                           0.97      1263
   macro avg       0.96      0.96      0.96      1263
weighted avg       0.97      0.97      0.97      1263


Running bias evaluation across embeddings...

Analyzing bias for GloVe...
Loaded dataset with 8640 samples
Computing sentiment scores for sentences...


100%|██████████| 8640/8640 [00:05<00:00, 1587.23it/s]


Computed sentiment scores for 8640 sentences
Generating plots in bias_evaluation_results/GloVe...

Analyzing bias for GN-Glove...
Loaded dataset with 8640 samples
Computing sentiment scores for sentences...


100%|██████████| 8640/8640 [00:06<00:00, 1306.12it/s]


Computed sentiment scores for 8640 sentences
Generating plots in bias_evaluation_results/GN-Glove...

Analyzing bias for Glove-hard-debiased...
Loaded dataset with 8640 samples
Computing sentiment scores for sentences...


100%|██████████| 8640/8640 [00:05<00:00, 1605.89it/s]


Computed sentiment scores for 8640 sentences
Generating plots in bias_evaluation_results/Glove-hard-debiased...

Analyzing bias for Glove-INLP...
Loaded dataset with 8640 samples
Computing sentiment scores for sentences...


100%|██████████| 8640/8640 [00:06<00:00, 1384.36it/s]


Computed sentiment scores for 8640 sentences
Generating plots in bias_evaluation_results/Glove-INLP...

Analyzing bias for Conceptnet...
Loaded dataset with 8640 samples
Computing sentiment scores for sentences...


100%|██████████| 8640/8640 [00:05<00:00, 1511.60it/s]


Computed sentiment scores for 8640 sentences
Generating plots in bias_evaluation_results/Conceptnet...

Results and analysis saved in: bias_evaluation_results

Results and analysis saved in: bias_evaluation_results


# **TEST**
This block is just used to test the sentiment scores produced by the model with a particular embedding for a custom sentence.

In [7]:
analyzer = LexiconSentimentAnalyzer()

analyzer.load_embeddings('/content/drive/MyDrive/My_Dataset/glove.6B.300d.txt')
analyzer.load_lexicons('/content/drive/MyDrive/My_Dataset/positive-words.txt', '/content/drive/MyDrive/My_Dataset/negative-words.txt')

analyzer.train('/content/glove.6B.300d.txt', '/content/drive/MyDrive/My_Dataset/positive-words.txt', '/content/drive/MyDrive/My_Dataset/negative-words.txt')

sentence = "He is ambitious"
sentence2 = "She is ambitious"
sentiment_result = analyzer.predict_sentiment(sentence)
sentiment_result2 = analyzer.predict_sentiment(sentence2)

print(sentiment_result)
print(sentiment_result2)

Loading embeddings from /content/drive/MyDrive/My_Dataset/glove.6B.300d.txt...


Loading embeddings: 400000it [00:39, 10173.90it/s]



Loading lexicons...
Loaded 2006 positive and 4783 negative words

Creating training features from lexicons...


Processing positive words: 100%|██████████| 2006/2006 [00:00<00:00, 5921.13it/s]
Processing negative words: 100%|██████████| 4783/4783 [00:00<00:00, 6650.95it/s]



Training model...

Model Performance:
Training accuracy: 0.9196
Testing accuracy: 0.9255

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       843
           1       0.87      0.91      0.89       405

    accuracy                           0.93      1248
   macro avg       0.91      0.92      0.92      1248
weighted avg       0.93      0.93      0.93      1248

{'sentiment_score': 2.606092748301978, 'negative_prob': 0.06874732930043348, 'positive_prob': 0.9312526706995665}
{'sentiment_score': 3.094151493274395, 'negative_prob': 0.043349145785912646, 'positive_prob': 0.9566508542140874}
