In [1]:
print("⏳ Setting up environment for HMTAS inference...")

# --- Step 1: Ensure clean base ---
!pip uninstall -y bleurt tensorflow tensorflow-text -q || true
!rm -rf /root/.cache/pip /root/.cache/huggingface /root/.cache/nltk

# --- Step 2: Install core stable packages (quietly) ---
!pip install -q -U numpy==1.26.4 scikit-learn==1.3.2
!pip install -q transformers==4.41.2 datasets==2.18.0 sentence-transformers==2.7.0
!pip install -q rouge-score==0.1.2 bert-score==0.3.13
!pip install -q networkx==3.2.1 hdbscan==0.8.33 umap-learn==0.5.5
!pip install -q matplotlib seaborn nltk

# --- Step 3: Torch (CPU) install (GPU uses default runtime CUDA) ---
!pip install -q torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121

# --- Step 4: Download NLTK resources silently ---
import nltk
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

print("✅ Environment ready for HMTAS inference and visualization.")


⏳ Setting up environment for HMTAS inference...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m115.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.3.2 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m90.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB

In [2]:
# ================================================================
# HMTAS — Research-Grade Unified Inference & Fair Comparison
# Author: Rayyan Ahmed Khan
# ================================================================
# Features:
# - Fair zero-shot baseline evaluation (same prompt, same decoding)
# - Optional HMTAS-guided enhancement (intra-model improvement only)
# - Multi-seed reproducibility (mean ± 95% CI)
# - Unified tokenizer for length metrics
# - Stratified reporting (short/medium/long docs)
# - Compute transparency (params, tokens, VRAM)
# ================================================================

import os, time, random, json, gc, math, warnings
from pathlib import Path
from typing import List, Dict, Tuple

warnings.filterwarnings("ignore")
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from rouge_score import rouge_scorer
from bert_score import score as bert_score_calculator
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('punkt', quiet=True)

try:
    import sacrebleu; _HAS_SACREBLEU = True
except: _HAS_SACREBLEU = False
try:
    from nltk.translate.meteor_score import meteor_score; _HAS_METEOR = True
except: _HAS_METEOR = False

import hdbscan, networkx as nx
try:
    import umap.umap_ as umap; _HAS_UMAP = True
except: _HAS_UMAP = False

# ================================================================
# Configuration
# ================================================================
class Cfg:
    dataset_samples = 700
    seeds = [41, 42, 43]
    device = "cuda" if torch.cuda.is_available() else "cpu"
    out_dir = Path("./hmtas_research_output"); out_dir.mkdir(parents=True, exist_ok=True)

    # Decoding
    num_beams = 5
    min_length = 40
    max_target_length = 180
    length_penalty = 1.2
    repetition_penalty = 1.1
    no_repeat_ngram_size = 3

    # HMTAS
    token_budget = 240
    min_cluster_size = 3
    per_cluster_topk = 3
    fallback_topk = 8
    sim_threshold = 0.15
    pagerank_alpha = 0.85
    use_umap = True
    umap_dim = 16

cfg = Cfg()

# ================================================================
# Model Registry
# ================================================================
MODEL_REG = {
    "HMTAS(FLAN-T5-Base)": {
        "name": "google/flan-t5-base", "ctx": 512, "prompt_style": "t5", "guided": True
    },
    "FLAN-T5-Base": {
        "name": "google/flan-t5-base", "ctx": 512, "prompt_style": "t5", "guided": False
    },
    "BART-Large-CNN": {
        "name": "facebook/bart-large-cnn", "ctx": 1024, "prompt_style": "plain", "guided": False
    },
    "T5-Base": {
        "name": "t5-base", "ctx": 512, "prompt_style": "t5", "guided": False
    },
    "PEGASUS-CNNDM": {
        "name": "google/pegasus-cnn_dailymail", "ctx": 1024, "prompt_style": "plain", "guided": False
    },
    "HMTAS-unguided": {
        "name": "google/flan-t5-base", "ctx": 512, "prompt_style": "t5", "guided": False
    }
}

# ================================================================
# Dataset loader
# ================================================================
def load_multi_document_dataset(n_samples: int):
    ds = load_dataset('multi_news', split=f'test[:{n_samples}]')
    pool = []
    for ex in ds:
        docs_raw = ex["document"] or ""
        docs = [d.strip() for d in docs_raw.split("||||") if d.strip()]
        if not docs: docs = [docs_raw.strip()]
        pool.append({"texts": docs, "summary": ex["summary"] or ""})
    print(f"✓ Loaded {len(pool)} multi-document samples.")
    return pool

# ================================================================
# HMTAS Components
# ================================================================
class SentenceEncoderGPU:
    def __init__(self, device):
        self.model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
    def encode(self, sents):
        return self.model.encode(sents, convert_to_tensor=True, device=cfg.device, show_progress_bar=False)
    def cosine(self, a, b):
        a = F.normalize(a, p=2, dim=-1); b = F.normalize(b, p=2, dim=-1)
        return torch.mm(a, b.t())

class HMTASClusterer:
    def cluster(self, emb_like):
        e = emb_like.detach().cpu().numpy()
        if e.shape[0] < cfg.min_cluster_size: return np.zeros(e.shape[0], dtype=int)
        c = hdbscan.HDBSCAN(min_cluster_size=cfg.min_cluster_size, min_samples=1)
        labels = c.fit_predict(e)
        if (labels == -1).any():
            non = np.where(labels != -1)[0]
            if len(non) > 0:
                for i in np.where(labels == -1)[0]:
                    sims = F.cosine_similarity(torch.tensor(e[i]).unsqueeze(0),
                                               torch.tensor(e[non]), dim=-1)
                    labels[i] = int(labels[non][sims.argmax().item()])
            else: labels[:] = 0
        return labels

class HMTASProcessor:
    def __init__(self, tokenizer, device):
        self.tok = tokenizer
        self.encoder = SentenceEncoderGPU(device)
        self.clusterer = HMTASClusterer()

    def preprocess(self, docs):
        sents = []
        for d in docs:
            sents.extend([s.strip() for s in sent_tokenize(d) if 8 < len(s.split()) < 120])
        return sents

    def safe_text(self, text, ctx, prompt_style):
        ids = self.tok.encode(text, add_special_tokens=True, truncation=True, max_length=ctx)
        text = self.tok.decode(ids, skip_special_tokens=True)
        if prompt_style == "t5" and not text.lower().startswith("summarize:"):
            text = "summarize: " + text
        return text

    def create_guided_input(self, docs, ctx, prompt_style):
        sents = self.preprocess(docs)
        base_text = NEUTRAL_PREFIX + SEP.join(docs)
        if not sents: return self.safe_text(base_text, ctx, prompt_style)
        emb = self.encoder.encode(sents)
        clust_input = emb
        if _HAS_UMAP and cfg.use_umap and len(sents) >= 20:
            reducer = umap.UMAP(n_components=cfg.umap_dim, random_state=42)
            clust_input = torch.tensor(reducer.fit_transform(emb.cpu()), dtype=torch.float32, device=cfg.device)

        sim = self.encoder.cosine(emb, emb)
        labels = self.clusterer.cluster(clust_input)

        chosen = []
        for l in np.unique(labels):
            idxs = np.where(labels == l)[0]
            sub = sim[idxs][:, idxs].cpu().numpy()
            sub[sub < cfg.sim_threshold] = 0; np.fill_diagonal(sub, 0)
            G = nx.from_numpy_array(sub)
            pr = nx.pagerank(G, alpha=cfg.pagerank_alpha)
            ranked = sorted(pr.items(), key=lambda x: x[1], reverse=True)
            chosen.extend([idxs[i] for i, _ in ranked[:cfg.per_cluster_topk]])
        if len(chosen) < cfg.fallback_topk:
            sums = sim.sum(dim=1).cpu().numpy()
            topk = np.argsort(-sums)[:cfg.fallback_topk]
            chosen = list(dict.fromkeys(chosen + topk.tolist()))
        total, final = 0, []
        for i in chosen:
            n = len(self.tok.encode(sents[i], add_special_tokens=False))
            if total + n > cfg.token_budget: break
            final.append(i); total += n
        guide = " ".join([sents[i] for i in final])
        text = f"{guide} </s> {base_text}"
        return self.safe_text(text, ctx, prompt_style)

# ================================================================
# Prompts for fairness
# ================================================================
NEUTRAL_PREFIX = "Summarize the following multiple articles into one concise paragraph:\n"
SEP = "\n\n---\n\n"
UNIFIED_LEN_TOK = AutoTokenizer.from_pretrained("t5-base")

def build_neutral_input(docs): return NEUTRAL_PREFIX + SEP.join(docs)

# ================================================================
# Safe batch generation
# ================================================================
def safe_batch_generate(model, tok, texts, ctx_limit):
    outs = []
    bs = 4
    model.eval()
    t0 = time.time()
    with torch.no_grad():
        for i in range(0, len(texts), bs):
            chunk = texts[i:i+bs]
            enc = tok(chunk, max_length=ctx_limit, truncation=True, padding=True, return_tensors='pt').to(cfg.device)
            gen_ids = model.generate(**enc, num_beams=cfg.num_beams, max_length=cfg.max_target_length,
                                     min_length=cfg.min_length, length_penalty=cfg.length_penalty,
                                     repetition_penalty=cfg.repetition_penalty,
                                     no_repeat_ngram_size=cfg.no_repeat_ngram_size)
            outs.extend(tok.batch_decode(gen_ids, skip_special_tokens=True))
    latency_ms = (time.time() - t0)/len(texts)*1000
    throughput = len(texts)/(time.time()-t0)
    vram = torch.cuda.max_memory_allocated()/1024**2 if torch.cuda.is_available() else 0
    return outs, latency_ms, throughput, vram

# ================================================================
# Metrics (fixed + research-ready)
# ================================================================
def coverage_compression(src, summ):
    src_t = len(UNIFIED_LEN_TOK.encode(src, add_special_tokens=False))
    sum_t = len(UNIFIED_LEN_TOK.encode(summ, add_special_tokens=False))
    if src_t == 0 or sum_t == 0:
        return 0.0, 0.0
    coverage = (sum_t / src_t) * 100.0
    compression = src_t / sum_t
    return coverage, compression


def compute_all_metrics(sources, refs, cands):
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    r1, r2, rL, bertF, bleu, meteor, cov, comp = [], [], [], [], [], [], [], []

    for s, r, c in zip(sources, refs, cands):
        # ROUGE
        sc = rouge.score(r, c)
        r1.append(sc['rouge1'].fmeasure)
        r2.append(sc['rouge2'].fmeasure)
        rL.append(sc['rougeL'].fmeasure)

        # BLEU
        if _HAS_SACREBLEU:
            try:
                bleu.append(sacrebleu.sentence_bleu(c, [r]).score)
            except Exception:
                bleu.append(0.0)
        else:
            bleu.append(0.0)

        # METEOR (fixed)
        if _HAS_METEOR:
            try:
                from nltk.tokenize import word_tokenize
                ref_tokens = word_tokenize(r)
                cand_tokens = word_tokenize(c)
                meteor.append(meteor_score([ref_tokens], cand_tokens) * 100.0)
            except Exception:
                meteor.append(0.0)
        else:
            meteor.append(0.0)

        # Coverage / Compression
        cv, cp = coverage_compression(s, c)
        cov.append(cv)
        comp.append(cp)

    # BERTScore
    try:
        _, _, F1 = bert_score_calculator(
            cands, refs, lang='en', model_type='distilbert-base-uncased', device=cfg.device
        )
        bertF = (F1.detach().cpu().numpy() * 100.0).tolist()
    except Exception:
        bertF = [0.0] * len(cands)

    return dict(
        rouge1=r1, rouge2=r2, rougeL=rL,
        bert_f1=bertF, bleu=bleu, meteor=meteor,
        coverage=cov, compression=comp
    )
# ================================================================
# Paired bootstrap CI
# ================================================================
def paired_bootstrap_ci(a,b,n_boot=2000):
    rng=np.random.default_rng(42); a=np.array(a); b=np.array(b); n=len(a)
    diffs=[(a[r]-b[r]).mean() for r in [rng.integers(0,n,n) for _ in range(n_boot)]]
    lo,hi=np.percentile(diffs,[2.5,97.5]); return float(a.mean()-b.mean()), float(lo), float(hi)

# ================================================================
# Run single model
# ================================================================
def run_model(name,spec,pool,hmtas_proc=None):
    tok=AutoTokenizer.from_pretrained(spec['name']); model=AutoModelForSeq2SeqLM.from_pretrained(spec['name']).to(cfg.device)
    sources=[" ".join(x['texts']) for x in pool]; refs=[x['summary'] for x in pool]
    if spec["guided"]:
        inputs=[hmtas_proc.create_guided_input(x['texts'],ctx=spec['ctx'],prompt_style=spec['prompt_style']) for x in pool]
    else:
        raw=[build_neutral_input(x['texts']) for x in pool]
        proc_tmp=HMTASProcessor(tok,cfg.device)
        inputs=[proc_tmp.safe_text(t,spec['ctx'],spec['prompt_style']) for t in raw]
    outs,lat,thpt,vram=safe_batch_generate(model,tok,inputs,spec['ctx'])
    m=compute_all_metrics(sources,refs,outs)
    n_params=sum(p.numel() for p in model.parameters())
    tok_count=sum(len(tok.encode(t)) for t in inputs)
    agg={k:float(np.mean(v)*100) for k,v in m.items() if k.startswith("rouge")}
    agg.update(dict(bert_f1=float(np.mean(m["bert_f1"])),bleu=float(np.mean(m["bleu"])),
                    meteor=float(np.mean(m["meteor"])),coverage=float(np.mean(m["coverage"])),
                    compression=float(np.mean(m["compression"])),latency_ms=float(lat),
                    throughput=float(thpt),vram=float(vram),params=int(n_params),
                    tokens=int(tok_count)))
    del model; gc.collect(); torch.cuda.empty_cache()
    return {"name":name,"metrics":m,"aggregate":agg}

# ================================================================
# Stratified bins
# ================================================================
def stratify_bins(pool):
    srcs=[" ".join(x["texts"]) for x in pool]
    lens=[len(UNIFIED_LEN_TOK.encode(s,add_special_tokens=False)) for s in srcs]
    bins=pd.qcut(lens,3,labels=["short","medium","long"])
    return bins,lens

# ================================================================
# Main multi-seed experiment
# ================================================================
def run_all_models(pool):
    all_results=[]
    for seed in cfg.seeds:
        random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
        if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
        print(f"\n=== SEED {seed} ===")
        h_tok=AutoTokenizer.from_pretrained(MODEL_REG["HMTAS(FLAN-T5-Base)"]["name"])
        h_proc=HMTASProcessor(h_tok,cfg.device)
        res={}
        for k in ["FLAN-T5-Base","HMTAS(FLAN-T5-Base)","HMTAS-unguided","BART-Large-CNN","T5-Base","PEGASUS-CNNDM"]:
            res[k]=run_model(k,MODEL_REG[k],pool,hmtas_proc=h_proc)
        all_results.append(res)
    return all_results

# ================================================================
# Aggregate + CI
# ================================================================
def summarize_multi_seed(all_results):
    keys=list(all_results[0].keys())
    table=[]
    for k in keys:
        vals=[r[k]["aggregate"] for r in all_results]
        metric_means={m:np.mean([v[m] for v in vals]) for m in vals[0].keys()}
        table.append((k,metric_means))
    df=pd.DataFrame([{**{"model":k},**v} for k,v in table])
    df.to_csv(cfg.out_dir/"aggregate_across_seeds.csv",index=False)
    print(df[["model","rouge1","rouge2","rougeL","bert_f1","coverage","compression","latency_ms","vram"]])
    return df

# ================================================================
# Entry
# ================================================================
def main():
    pool=load_multi_document_dataset(cfg.dataset_samples)
    bins,_=stratify_bins(pool)
    results=run_all_models(pool)
    df=summarize_multi_seed(results)
    df.to_json(cfg.out_dir/"final_summary.json",indent=2)
    print("\nResults saved to",cfg.out_dir)

if __name__=="__main__":
    main()


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading readme: 0.00B [00:00, ?B/s]

Downloading data:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/58.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/66.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.30M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/69.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.31M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/44972 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5622 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5622 [00:00<?, ? examples/s]

✓ Loaded 700 multi-document samples.

=== SEED 41 ===


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors


config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]


=== SEED 42 ===


Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



=== SEED 43 ===


Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (514 > 512). Running this sequence through the model will result in indexing errors
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


                 model     rouge1    rouge2     rougeL    bert_f1  coverage  \
0         FLAN-T5-Base  26.084437  8.872018  15.305442  76.802909  6.982944   
1  HMTAS(FLAN-T5-Base)  25.849277  8.736998  15.076245  76.665320  7.453982   
2       HMTAS-unguided  26.084437  8.872018  15.305442  76.802909  6.982944   
3       BART-Large-CNN  24.881230  8.645705  15.263518  77.228938  5.482907   
4              T5-Base  22.623412  7.515289  14.069530  76.609719  5.169069   
5        PEGASUS-CNNDM  26.205374  9.142359  15.578075  76.381696  6.881394   

   compression   latency_ms         vram  
0    36.112010  1329.881280  5366.271647  
1    35.691473  1268.648379  5437.310384  
2    36.112010  1305.897237  5437.310384  
3    35.638561   726.120226  5831.343587  
4    37.939908   735.854701  5831.343587  
5    31.817567   916.786785  6371.048014  

Results saved to hmtas_research_output
