Models: strong embeddings + NLI (high performance)

In [1]:
!pip -q install sentence-transformers transformers torch openpyxl

In [2]:

#token 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

import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ---- Embeddings (stronger than MiniLM) ----
EMB_MODEL_NAME = "all-mpnet-base-v2"     # strong; swap to "all-mpnet-base-v2" if you want faster
emb_model = SentenceTransformer(EMB_MODEL_NAME, device="cuda" if torch.cuda.is_available() else "cpu")

def embed(texts):
    if isinstance(texts, str):
        texts = [texts]
    return emb_model.encode(texts, normalize_embeddings=True, batch_size=32, show_progress_bar=False)

# ---- NLI / entailment (strategy supports goal?) ----
NLI_MODEL = "roberta-large-mnli"
# or: NLI_MODEL = "facebook/bart-large-mnli"
tok = AutoTokenizer.from_pretrained(NLI_MODEL)
nli = AutoModelForSequenceClassification.from_pretrained(NLI_MODEL)
nli.eval()

# MNLI label order is typically: contradiction, neutral, entailment
CONTR_IDX, NEUTRAL_IDX, ENTAIL_IDX = 0, 1, 2

@torch.inference_mode()
def nli_probs(premises, hypotheses, batch_size=16):
    """
    premises: list[str]  (strategy)
    hypotheses: list[str] (goal)
    returns: (contradiction, neutral, entailment) probabilities as np arrays
    """
    device = next(nli.parameters()).device
    c_all, n_all, e_all = [], [], []

    for i in range(0, len(premises), batch_size):
        p = premises[i:i+batch_size]
        h = hypotheses[i:i+batch_size]
        enc = tok(p, h, padding=True, truncation=True, return_tensors="pt").to(device)
        logits = nli(**enc).logits
        probs = torch.softmax(logits, dim=-1).detach().cpu().numpy()
        c_all.append(probs[:, CONTR_IDX]); n_all.append(probs[:, NEUTRAL_IDX]); e_all.append(probs[:, ENTAIL_IDX])

    return np.concatenate(c_all), np.concatenate(n_all), np.concatenate(e_all)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Scoring: map (entailment / contradiction + similarity) → -2 … +3

In [3]:
def score_pair(strategy, goal):
    # semantic relatedness (main driver)
    s_vec = embed(strategy)[0]
    g_vec = embed(goal)[0]
    sim = float(np.dot(s_vec, g_vec))

    # NLI only for negativity (conflict detector)
    hypothesis = f"The company's goal is: {goal}."
    c, n, e = nli_probs([strategy], [hypothesis])
    c = float(c[0]); n = float(n[0]); e = float(e[0])

    # Strong contradiction => negative
    if c >= 0.60 and sim >= 0.20:
        return -2, sim, c, n, e
    if c >= 0.40 and sim >= 0.20:
        return -1, sim, c, n, e

    # Otherwise, score by similarity (support strength)
    # (tune these cutoffs to match your labeling style)
    if sim >= 0.55:
        return 3, sim, c, n, e
    if sim >= 0.42:
        return 2, sim, c, n, e
    if sim >= 0.22:
        return 1, sim, c, n, e


    return 0, sim, c, n, e




Build the Goals × Strategies matrix

In [4]:
import pandas as pd

def goals_vs_strategies_matrix(goals, strategies, blank_zeros=True):
    import pandas as pd

    mat = []
    debug_rows = []

    for i, st in enumerate(strategies):
        row = []
        for j, g in enumerate(goals):
            sc, sim, c, n, e = score_pair(st, g)

            # matrix cell (blank or score)
            row.append("" if (blank_zeros and sc == 0) else sc)

            # debug (optional but useful for your report)
            debug_rows.append({
                "strategy_idx": i,
                "goal_idx": j,
                "strategy": st,
                "goal": g,
                "score": sc,
                "cosine_sim": sim,
                "contradiction_p": c,
                "neutral_p": n,
                "entailment_p": e
            })

        mat.append(row)

    df_matrix = pd.DataFrame(mat, columns=goals)
    df_matrix.insert(0, "Strategie", strategies)

    df_debug = pd.DataFrame(debug_rows)
    return df_matrix, df_debug



In [5]:
goals = [
    "Customer satisfaction > 9",
    "Increase in employee satisfaction 10% per year",
    "Positive financial ratios",
    "Private label offering from 40 to 50",
    "Continuous improvement of sustainability",
]

strategies = [
    "We offer local producers the opportunity every week to present their products to customers in the store",
    "We use sustainable packaging as much as possible and no packaging where possible",
    "We limit the number of travel movements of suppliers, employees and delivery services as much as possible",
    "We continuously measure customer satisfaction, both in-store and through online surveys",
    "We conclude multi-year contracts with local suppliers after the commercial value of the products has been positively determined.",
]

df_matrix, df_debug = goals_vs_strategies_matrix(goals, strategies, blank_zeros=True)
df_matrix



Unnamed: 0,Strategie,Customer satisfaction > 9,Increase in employee satisfaction 10% per year,Positive financial ratios,Private label offering from 40 to 50,Continuous improvement of sustainability
0,We offer local producers the opportunity every...,,,,1.0,
1,We use sustainable packaging as much as possib...,,,,,1.0
2,We limit the number of travel movements of sup...,1.0,,,,
3,"We continuously measure customer satisfaction,...",3.0,2.0,-2.0,,1.0
4,We conclude multi-year contracts with local su...,1.0,1.0,1.0,1.0,


D) Export to Excel in the same style (rotated goal headers + borders)

In [6]:
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font, Border, Side
from openpyxl.utils.dataframe import dataframe_to_rows

def export_matrix_to_excel(df_matrix, filename="goals_vs_strategies_matrix.xlsx", rotate_headers=55, bold_last_goal=True):
    wb = Workbook()
    ws = wb.active
    ws.title = "Matrix"

    # Write dataframe (including headers)
    for r in dataframe_to_rows(df_matrix, index=False, header=True):
        ws.append(r)

    # Formatting
    thin = Side(style="thin")
    border = Border(left=thin, right=thin, top=thin, bottom=thin)

    # Column widths
    ws.column_dimensions["A"].width = 55  # Strategie column
    for col in range(2, df_matrix.shape[1] + 1):
        ws.column_dimensions[chr(64 + col)].width = 16

    # Header row formatting (rotated goals)
    header_row = 1
    ws["A1"].font = Font(bold=True)
    ws["A1"].alignment = Alignment(horizontal="left", vertical="center")

    for col in range(2, df_matrix.shape[1] + 1):
        cell = ws.cell(row=header_row, column=col)
        cell.font = Font(bold=True)
        cell.alignment = Alignment(textRotation=rotate_headers, horizontal="center", vertical="bottom", wrap_text=True)

    # Body formatting
    for row in range(1, ws.max_row + 1):
        ws.row_dimensions[row].height = 60 if row == 1 else 55

    for row in range(1, ws.max_row + 1):
        for col in range(1, ws.max_column + 1):
            c = ws.cell(row=row, column=col)
            c.border = border
            if row > 1 and col == 1:
                c.alignment = Alignment(wrap_text=True, vertical="top")
            elif row > 1 and col > 1:
                c.alignment = Alignment(horizontal="center", vertical="center")

    # Optionally bold the last goal column (like your screenshot emphasis)
    if bold_last_goal:
        last_col = ws.max_column
        for row in range(2, ws.max_row + 1):
            ws.cell(row=row, column=last_col).font = Font(bold=True)

    ws.freeze_panes = "B2"
    wb.save(filename)
    return filename

out_file = export_matrix_to_excel(df_matrix, filename="goals_vs_strategies_matrix.xlsx")
out_file


'goals_vs_strategies_matrix.xlsx'