# Analysis of Generated Articles

This notebook analyzes the synthetic scientific articles generated by different LLMs.

In [1]:
import json
import os
import re
import pandas as pd
from collections import defaultdict

OUTPUT_DIR = "output"
EXPECTED_TAGS = ["model", "params", "hardware", "training", "country", "year"]

## 1. Load Data

In [2]:
def load_all_articles():
    all_data = {}
    
    for model_name in sorted(os.listdir(OUTPUT_DIR)):
        model_dir = os.path.join(OUTPUT_DIR, model_name)
        json_path = os.path.join(model_dir, "articles.json")
        
        if os.path.exists(json_path):
            with open(json_path, "r", encoding="utf-8") as f:
                all_data[model_name] = json.load(f)
    
    return all_data

data = load_all_articles()
print(f"Models loaded: {list(data.keys())}")

Models loaded: ['claude-sonnet', 'claude-sonnet-real', 'kimi', 'kimi-real', 'qwen', 'qwen-real']


## 2. Success Rate

How many articles were generated successfully (without JSON errors)?

In [3]:
success_data = []

for model_name, articles in data.items():
    success_data.append({
        "Model": model_name,
        "Generated": len(articles),
        "Expected": 10,
        "Success Rate": f"{(len(articles)/10)*100:.0f}%"
    })

df_success = pd.DataFrame(success_data)
df_success

Unnamed: 0,Model,Generated,Expected,Success Rate
0,claude-sonnet,10,10,100%
1,claude-sonnet-real,10,10,100%
2,kimi,10,10,100%
3,kimi-real,10,10,100%
4,qwen,10,10,100%
5,qwen-real,10,10,100%


## 3. XML Tags Compliance

Are all XML tags present in each article?

In [4]:
def check_tags(text):
    found = {}
    for tag in EXPECTED_TAGS:
        pattern = f"<{tag}>.*?</{tag}>"
        found[tag] = bool(re.search(pattern, text, re.DOTALL))
    return found

tags_data = []

for model_name, articles in data.items():
    tag_counts = defaultdict(int)
    all_tags_count = 0
    
    for article in articles:
        text = article.get("article", "")
        tags = check_tags(text)
        
        all_present = True
        for tag, present in tags.items():
            if present:
                tag_counts[tag] += 1
            else:
                all_present = False
        
        if all_present:
            all_tags_count += 1
    
    total = len(articles)
    row = {"Model": model_name}
    for tag in EXPECTED_TAGS:
        row[f"<{tag}>"] = f"{tag_counts[tag]}/{total}"
    row["All Tags"] = f"{all_tags_count}/{total} ({(all_tags_count/total)*100:.0f}%)"
    tags_data.append(row)

df_tags = pd.DataFrame(tags_data)
df_tags

Unnamed: 0,Model,<model>,<params>,<hardware>,<training>,<country>,<year>,All Tags
0,claude-sonnet,10/10,10/10,10/10,10/10,10/10,10/10,10/10 (100%)
1,claude-sonnet-real,10/10,10/10,10/10,10/10,10/10,10/10,10/10 (100%)
2,kimi,10/10,10/10,10/10,10/10,10/10,9/10,9/10 (90%)
3,kimi-real,9/10,9/10,6/10,10/10,10/10,9/10,6/10 (60%)
4,qwen,10/10,10/10,10/10,10/10,10/10,10/10,10/10 (100%)
5,qwen-real,10/10,10/10,8/10,10/10,10/10,9/10,8/10 (80%)


## 4. Article Length

Number of words and paragraphs per article.

In [5]:
def count_words(text):
    return len(text.split())

def count_paragraphs(text):
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
    if not paragraphs:
        paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
    return len(paragraphs) if paragraphs else 1

length_data = []

for model_name, articles in data.items():
    words = [count_words(a.get("article", "")) for a in articles]
    paras = [count_paragraphs(a.get("article", "")) for a in articles]
    
    length_data.append({
        "Model": model_name,
        "Words (min)": min(words),
        "Words (max)": max(words),
        "Words (avg)": f"{sum(words)/len(words):.0f}",
        "Paragraphs (min)": min(paras),
        "Paragraphs (max)": max(paras),
        "Paragraphs (avg)": f"{sum(paras)/len(paras):.1f}"
    })

df_length = pd.DataFrame(length_data)
df_length

Unnamed: 0,Model,Words (min),Words (max),Words (avg),Paragraphs (min),Paragraphs (max),Paragraphs (avg)
0,claude-sonnet,242,285,264,3,4,3.9
1,claude-sonnet-real,238,302,263,3,4,3.9
2,kimi,108,344,245,1,3,2.4
3,kimi-real,90,193,129,1,1,1.0
4,qwen,98,266,211,1,3,2.2
5,qwen-real,97,304,221,1,3,2.0


## 5. Unique Values Analysis

### 5.1 Unique Model Names

In [6]:
def extract_tag_value(text, tag):
    pattern = f"<{tag}>(.*?)</{tag}>"
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else None

for model_name, articles in data.items():
    model_names = set()
    for article in articles:
        text = article.get("article", "")
        name = extract_tag_value(text, "model")
        if name:
            model_names.add(name)
    
    print(f"\n{model_name}:")
    print(f"  Unique model names: {len(model_names)}/{len(articles)}")
    for name in sorted(model_names):
        print(f"    - {name}")


claude-sonnet:
  Unique model names: 8/10
    - CogniNet-Alpha
    - DeepReason-7B
    - LinguaMax-7B
    - NeuralCognitus-7B
    - NeuralMind-7
    - NeuralMind-7B
    - NeuroFlex-7B
    - NeuroLingua-XL

claude-sonnet-real:
  Unique model names: 8/10
    - DeepMind Chinchilla
    - DeepSeek-V2
    - DeepSeq-T5X
    - GLM-130B
    - LinguaFormer-7B
    - LinguaFormer-XL
    - LinguaNet-7B
    - PaLM-2

kimi:
  Unique model names: 8/10
    - CerebroLM-9B
    - Cerebrus-7B
    - Cogito-7B
    - Cogito-9X
    - CogniNet-π
    - Cognitron-7X
    - NeuroSpark-XL
    - NeuroSynth-7

kimi-real:
  Unique model names: 6/10
    - Aurora-7B
    - Aurora-GPT
    - NexusLM-7B
    - NordicBERT-Large
    - PanGu-Σ
    - Turing-NL 2.7B

qwen:
  Unique model names: 3/10
    - AmpliMind-XL
    - NeuraSynth-9
    - NeuroSynth-9

qwen-real:
  Unique model names: 4/10
    - NeuraScale-9
    - NeuralReasoner-12B
    - NeuralReasoner-7B
    - Panthalassa-12B


### 5.2 Unique Hardware (GPUs/TPUs)

In [7]:
for model_name, articles in data.items():
    hardware_list = set()
    for article in articles:
        text = article.get("article", "")
        hw = extract_tag_value(text, "hardware")
        if hw:
            hardware_list.add(hw)
    
    print(f"\n{model_name}:")
    print(f"  Unique hardware: {len(hardware_list)}/{len(articles)}")
    for hw in sorted(hardware_list):
        print(f"    - {hw}")


claude-sonnet:
  Unique hardware: 3/10
    - NVIDIA A100 80GB GPUs
    - NVIDIA A100 GPUs
    - NVIDIA H100 GPUs

claude-sonnet-real:
  Unique hardware: 7/10
    - 128 NVIDIA A100 GPUs
    - 2048 NVIDIA H800 GPUs
    - 96 NVIDIA A100 40GB GPUs
    - 96 NVIDIA A100 80GB GPUs
    - NVIDIA A100 80GB GPUs
    - NVIDIA A100 GPUs
    - TPU v4

kimi:
  Unique hardware: 8/10
    - 192 NVIDIA A100-SXM4 80 GB GPUs
    - 256 NVIDIA A100 GPUs
    - 384 NVIDIA A100-SXM4-80GB GPUs
    - 768 NVIDIA A100 GPUs
    - A100-SXM4-80GB
    - NVIDIA A100-SXM4-80 GB
    - NVIDIA H100 80 GB SXM
    - TPU-v4

kimi-real:
  Unique hardware: 6/10
    - 512 NVIDIA A100 80 GB GPUs
    - 512 TPU-v4 cores
    - Huawei Ascend 910
    - NVIDIA A100 80 GB
    - NVIDIA A100-SXM4-80GB
    - TPU-v4 pods containing 512 chips

qwen:
  Unique hardware: 6/10
    - GPUs
    - NVIDIA A100
    - NVIDIA A100 GPUs
    - NVIDIA H100
    - NVIDIA H100 Tensor Core GPUs
    - TPU v4

qwen-real:
  Unique hardware: 4/10
    - 512 NVIDIA 

### 5.3 Unique Countries

In [8]:
for model_name, articles in data.items():
    countries = set()
    for article in articles:
        text = article.get("article", "")
        country = extract_tag_value(text, "country")
        if country:
            countries.add(country)
    
    print(f"\n{model_name}:")
    print(f"  Unique countries: {len(countries)}/{len(articles)}")
    for c in sorted(countries):
        print(f"    - {c}")


claude-sonnet:
  Unique countries: 5/10
    - Canada
    - Germany
    - Singapore
    - South Korea
    - Sweden

claude-sonnet-real:
  Unique countries: 6/10
    - Canada
    - China
    - Germany
    - South Korea
    - United Kingdom
    - United States

kimi:
  Unique countries: 6/10
    - Canada
    - Japan
    - Singapore
    - Singapore’s
    - South Korea
    - Sweden

kimi-real:
  Unique countries: 6/10
    - Canada
    - China
    - Finland
    - India
    - Japan
    - Singapore

qwen:
  Unique countries: 5/10
    - Canadian Advanced AI Initiative
    - Canadian Advanced AI Institute
    - Canadian Institute for Advanced Machine Learning (CIAML)
    - Japan
    - Switzerland

qwen-real:
  Unique countries: 4/10
    - Canada
    - Japan
    - United States
    - the United States


### 5.4 Unique Years

In [9]:
for model_name, articles in data.items():
    years = set()
    for article in articles:
        text = article.get("article", "")
        year = extract_tag_value(text, "year")
        if year:
            years.add(year)
    
    print(f"\n{model_name}:")
    print(f"  Unique years: {len(years)}/{len(articles)}")
    for y in sorted(years):
        print(f"    - {y}")


claude-sonnet:
  Unique years: 2/10
    - 2023
    - 2024

claude-sonnet-real:
  Unique years: 3/10
    - 2022
    - 2023
    - 2024

kimi:
  Unique years: 3/10
    - 2022
    - 2023
    - 2025

kimi-real:
  Unique years: 3/10
    - 2022
    - 2023
    - 2024

qwen:
  Unique years: 2/10
    - 2023
    - 2024

qwen-real:
  Unique years: 2/10
    - 2023
    - 2024


### 5.5 Unique Parameter Counts

In [10]:
for model_name, articles in data.items():
    params = set()
    for article in articles:
        text = article.get("article", "")
        param = extract_tag_value(text, "params")
        if param:
            params.add(param)
    
    print(f"\n{model_name}:")
    print(f"  Unique parameter counts: {len(params)}/{len(articles)}")
    for p in sorted(params):
        print(f"    - {p}")


claude-sonnet:
  Unique parameter counts: 3/10
    - 175 billion parameters
    - 7.2 billion parameters
    - 7.3 billion parameters

claude-sonnet-real:
  Unique parameter counts: 7/10
    - 11 billion parameters
    - 13.7 billion parameters
    - 130 billion parameters
    - 236 billion parameters
    - 340 billion parameters
    - 7.2 billion parameters
    - 70 billion parameters

kimi:
  Unique parameter counts: 7/10
    - 1.8 trillion
    - 14B
    - 2.7×10^11
    - 6.8 billion
    - 6.9 billion parameters
    - 7×10⁹
    - 9.3 billion

kimi-real:
  Unique parameter counts: 7/10
    - 1.085 trillion
    - 1.2B
    - 1.3 billion
    - 2.7 billion
    - 356 million
    - 7.2 billion
    - 7.3 billion

qwen:
  Unique parameter counts: 6/10
    - 1.2 trillion
    - 12.4 trillion
    - 12.5 trillion
    - 12.8 billion
    - 12.8 trillion
    - 3.2 trillion

qwen-real:
  Unique parameter counts: 7/10
    - 1.2 trillion
    - 1.5 trillion
    - 12.3 billion
    - 12.3 trillion
    - 

## 6. Unique Article Beginnings

First words of each article to analyze stylistic diversity.

In [15]:
def get_first_words(text, n=4):
    words = text.split()[:n]
    return " ".join(words) + "..."

for model_name, articles in data.items():
    beginnings = set()
    for article in articles:
        text = article.get("article", "")
        beginning = get_first_words(text, 2)
        beginnings.add(beginning)
    
    print(f"\n{model_name}:")
    print(f"  Unique beginnings: {len(beginnings)}/{len(articles)}")
    for b in sorted(beginnings):
        print(f"    - {b}")


claude-sonnet:
  Unique beginnings: 3/10
    - In this...
    - This paper...
    - We present...

claude-sonnet-real:
  Unique beginnings: 2/10
    - We introduce...
    - We present...

kimi:
  Unique beginnings: 8/10
    - In this...
    - Recent advances...
    - The <model>Cogito-7B</model>...
    - The <model>Cogito-9X</model>...
    - The <model>CogniNet-π</model>...
    - The <model>Cognitron-7X</model>...
    - The <model>NeuroSynth-7</model>...
    - We introduce...

kimi-real:
  Unique beginnings: 3/10
    - In this...
    - The recent...
    - We introduce...

qwen:
  Unique beginnings: 6/10
    - <country>Japan</country>-based researchers...
    - <p>In recent...
    - <paragraph>Recent advancements...
    - In recent...
    - Recent advancements...
    - This study...

qwen-real:
  Unique beginnings: 5/10
    - <p>In recent...
    - <paragraph>In recent...
    - <paragraph>Recent advancements...
    - In recent...
    - Recent advancements...


## 7. Summary Table

In [16]:
summary_data = []

for model_name, articles in data.items():
    total = len(articles)
    
    # Count unique values
    model_names = set()
    hardware = set()
    countries = set()
    years = set()
    beginnings = set()
    all_tags = 0
    
    for article in articles:
        text = article.get("article", "")
        
        m = extract_tag_value(text, "model")
        if m: model_names.add(m)
        
        h = extract_tag_value(text, "hardware")
        if h: hardware.add(h)
        
        c = extract_tag_value(text, "country")
        if c: countries.add(c)
        
        y = extract_tag_value(text, "year")
        if y: years.add(y)
        
        beginnings.add(get_first_words(text, 8))
        
        tags = check_tags(text)
        if all(tags.values()):
            all_tags += 1
    
    summary_data.append({
        "Model": model_name,
        "Success": f"{total}/10",
        "All Tags": f"{all_tags}/{total}",
        "Unique Models": len(model_names),
        "Unique Hardware": len(hardware),
        "Unique Countries": len(countries),
        "Unique Years": len(years),
        "Unique Beginnings": len(beginnings)
    })

df_summary = pd.DataFrame(summary_data)
df_summary

Unnamed: 0,Model,Success,All Tags,Unique Models,Unique Hardware,Unique Countries,Unique Years,Unique Beginnings
0,claude-sonnet,10/10,10/10,8,3,5,2,9
1,claude-sonnet-real,10/10,10/10,8,7,6,3,8
2,kimi,10/10,9/10,8,8,6,3,10
3,kimi-real,10/10,6/10,6,6,6,3,8
4,qwen,10/10,10/10,3,6,5,2,10
5,qwen-real,10/10,8/10,4,4,4,2,7
