# Look for questions where the LM confidently disagrees with the reporter
- PopQA high-popularity questions might be good, but they don't have any distractors
  - We could probably just substitute in arbitrary answers from a question with the same relationship type and popularity (this is basically a custom counterfact)
- SciQ with contextual documents
- A little bit of amazon polarity
- Perhaps start with my hand-written easy dataset and ask a LM to generate more similar ones

It will make it easier to identify patterns if we keep each distribution separate at evaluation time (and maybe even during training)


In [3]:
import torch
import numpy as np
import torch
import random

seed = 633
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed);

In [4]:
# generate dataset from counterfact
from datasets import load_dataset, Dataset

ds_name, split = "NeelNanda/counterfact-tracing", "train"
# ds_name, split = "akariasai/PopQA", "test"
orig_dataset: Dataset = load_dataset(ds_name, split=split).shuffle(seed=seed)  # type: ignore

# dataset = dataset.map(map_fn, batched=True, batch_size=1, remove_columns=dataset.column_names)
orig_dataset

  from .autonotebook import tqdm as notebook_tqdm
Downloading readme: 100%|██████████| 1.93k/1.93k [00:00<00:00, 8.37MB/s]


Downloading and preparing dataset None/None to /home/alex/.cache/huggingface/datasets/NeelNanda___parquet/NeelNanda--counterfact-tracing-39c4f800d46af5cf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data: 100%|██████████| 1.11M/1.11M [00:00<00:00, 4.12MB/s]
Downloading data files: 100%|██████████| 1/1 [00:02<00:00,  2.06s/it]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 1070.25it/s]
                                                                        

Dataset parquet downloaded and prepared to /home/alex/.cache/huggingface/datasets/NeelNanda___parquet/NeelNanda--counterfact-tracing-39c4f800d46af5cf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.




Dataset({
    features: ['relation', 'relation_prefix', 'relation_suffix', 'prompt', 'relation_id', 'target_false_id', 'target_true_id', 'target_true', 'target_false', 'subject'],
    num_rows: 21919
})

In [3]:
# process the popQA dataset
# first select only the examples with s_pop in the upper 25% of the distribution
# then, for each example, find another example with the same relationship type (in the upper 25% of the distribution)
# and use this as a distractor object for that example, as a new column in the dataset
if ds_name == "akariasai/PopQA":
    s_pop_cutoff = np.percentile(orig_dataset["s_pop"], 90)
    pop_dataset = orig_dataset.filter(lambda x: x["s_pop"] >= s_pop_cutoff)

    def add_distractor(example):
        distractor_candidates = pop_dataset.filter(lambda x: (x["prop_id"] == example["prop_id"]) and (x["id"] != example["id"]))
        
        try:
            distractor = np.random.choice(distractor_candidates)
            dist_obj, dist_obj_id, dist_o_pop, dist_o_aliases = distractor["obj"], distractor["obj_id"], distractor["o_pop"], distractor["o_aliases"]
        except ValueError:
            dist_obj, dist_obj_id, dist_o_pop, dist_o_aliases = "42", None, None, []
            print("No distractor found for example", example["id"], "filled with \"42\"")
        return {"dist_obj": dist_obj, "dist_obj_id": dist_obj_id, "dist_o_pop": dist_o_pop, "dist_o_aliases": dist_o_aliases}

    pop_dataset = pop_dataset.map(add_distractor)
    pop_dataset


In [30]:
q_templates = {
    22: "What is {}'s occupation?",
    218: "In what city was {} born?",
    91: "What genre is {}?",
    257: "Who is the father of {}?",
    182: "In what country is {}?",
    164: "Who was the producer of {}?",
    526: "Who was the director of {}?",
    97: "What is {} the capital of?",
    533: "Who was the screenwriter for {}?",
    639: "Who was the composer of {}?",
    472: "What color is {}?",
    106: "What is the religion of {}?",
    560: "What sport does {} play?",
    484: "Who is the author of {}?",
    292: "Who is the mother of {}?",
    422: "What is the capital of {}?"
}
# q_templates = {
#     22: "{}'s occupation is",
#     218: "The city of birth of {} is",
#     91: "The genre of {} is",
#     257: "The father of {} is",
#     182: "{} is located in the country",
#     164: "The producer of {} was",
#     526: "The director of {} was",
#     97: "{} is the capital of",
#     533: "The screenwriter for {} was",
#     639: "The composer of {} was",
#     472: "The color of {} is",
#     106: "The religion of {} is",
#     560: "The sport played by {} is",
#     484: "The author of {} is",
#     292: "The mother of {} is",
#     422: "The capital of {} is"
# }

def get_labeled_texts(example, bos_token, few_shot_prefix=None):
    """few-shot-prefix is directly prepended to the prompt, without a newline, if provided"""
    prefix = ""
    if few_shot_prefix:
        prefix = few_shot_prefix + prefix

    if ds_name == "akariasai/PopQA":
        prop_id = example["prop_id"]
        q = q_templates[prop_id].format(example["subj"]) + " "
        obj, dist_obj = example["obj"], example["dist_obj"]
    elif ds_name == "NeelNanda/counterfact-tracing":
        q = example["prompt"]
        obj, dist_obj = example["target_true"], example["target_false"]

    prompt = bos_token + prefix + q

    suffix =  "\n\nIs this true?"
    text_true = prompt + obj + suffix
    text_false = prompt + dist_obj + suffix  # distractor object
    return {"texts": [text_true, text_false], "labels": [1, 0]}

def get_few_shot_prefix(examples):
    example_to_str = lambda example: example["text"] + " " + ("No", "Yes")[example["label"]] + "\n\n"
    example_strs = []
    for example in examples:
        exs = get_labeled_texts(example, bos_token="")
                
        # dict of list into list of dict
        exs = [dict(zip(["text", "label"], t)) for t in zip(*exs.values())]
        
        example_strs.extend(example_to_str(ex) for ex in exs)
    np.random.shuffle(example_strs)
    return "".join(example_strs)


In [9]:
n_total = 6000
temp_dataset = pop_dataset if ds_name == "akariasai/PopQA" else orig_dataset
# texts = np.array(dataset[:n_total]["texts"])
# labels = np.array(dataset[:n_total]["labels"])
dataset = temp_dataset.select(range(n_total))
n_shots = 10 // 2  # 5 examples per class, 2 classes
few_shot_set = temp_dataset.select(range(n_total, n_total + 1000))


In [6]:
from utils import load_model_and_tokenizer

# model_name = "huggyllama/llama-7b"
# model_name = "gpt2-xl"
# model_name = "/mnt/ssd-2/nora/vicuna-original-13b"
model_name = "huggyllama/llama-13b"
is_llama = "llama" in model_name or "vicuna" in model_name
model, tokenizer = load_model_and_tokenizer(model_name, is_llama=is_llama)

Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 2.33MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 411/411 [00:00<00:00, 2.35MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 700/700 [00:00<00:00, 5.80MB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 595/595 [00:00<00:00, 5.38MB/s]
Downloading (…)model.bin.index.json: 100%|██████████| 33.4k/33.4k [00:00<00:00, 1.41MB/s]
Downloading (…)l-00001-of-00003.bin: 100%|██████████| 9.95G/9.95G [00:49<00:00, 202MB/s]
Downloading (…)l-00002-of-00003.bin: 100%|██████████| 9.90G/9.90G [00:49<00:00, 201MB/s]
Downloading (…)l-00003-of-00003.bin: 100%|██████████| 6.18G/6.18G [00:30<00:00, 202MB/s]
Downloading shards: 100%|██████████| 3/3 [02:09<00:00, 43.19s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:18<00:00,  6.03s/it]
Downloading (…)neration_config.json: 100%|██████████| 137/137 [00:00<00:00, 872kB/s]


In [7]:
from tqdm import tqdm

In [10]:
from utils import call_model

def get_hiddens(dataset, few_shot_set):
    # run the model and get the hidden states at each layer
    # creates a len(texts) x n_layers x hidden_dimension array of zeros
    n_layer = model.config.num_hidden_layers
    hidden_size = model.config.hidden_size
    hiddens = np.zeros((2 * len(dataset), n_layer + 1, hidden_size))
    lm_probs = np.zeros((2 * len(dataset),))
    texts = np.zeros((2 * len(dataset),), dtype=object)
    labels = np.zeros((2 * len(dataset),), dtype=int)
    i = 0
    with torch.no_grad():
        for example in tqdm(dataset, total=n_total):
            few_shot_prefix = get_few_shot_prefix(few_shot_set.shuffle(seed=seed).select(range(n_shots))) if n_shots > 0 else None
            labeled_texts = get_labeled_texts(example, tokenizer.bos_token, few_shot_prefix=few_shot_prefix)
            for text, label in zip(labeled_texts["texts"], labeled_texts["labels"]):
                hidden_states, logits = call_model(model, tokenizer, text)

                # get prob assigned to each target
                noyes_tokens = ["No", "Yes"] if is_llama else [" No", " Yes"]
                no_id, yes_id = tokenizer(noyes_tokens, return_tensors="pt", add_special_tokens=False).input_ids.squeeze(1)
                p_no, p_yes = logits[0, -1, [no_id, yes_id]].softmax(dim=-1).cpu().numpy()

                hiddens[i, :, :] = torch.cat(hidden_states).cpu().numpy()[:, -1]  # all layers, last token
                lm_probs[i] = p_yes / (p_yes + p_no)
                labels[i] = label
                texts[i] = text
                i += 1
    return hiddens, lm_probs, labels, texts
hiddens, lm_probs, labels, texts = get_hiddens(dataset, few_shot_set)

  0%|          | 1/6000 [00:00<29:45,  3.36it/s]Loading cached shuffled indices for dataset at /mnt/ssd-2/hf_cache/NeelNanda___parquet/NeelNanda--counterfact-tracing-39c4f800d46af5cf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-a426c1a8645cabdb.arrow
  0%|          | 2/6000 [00:00<28:57,  3.45it/s]Loading cached shuffled indices for dataset at /mnt/ssd-2/hf_cache/NeelNanda___parquet/NeelNanda--counterfact-tracing-39c4f800d46af5cf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-a426c1a8645cabdb.arrow
  0%|          | 3/6000 [00:00<29:03,  3.44it/s]Loading cached shuffled indices for dataset at /mnt/ssd-2/hf_cache/NeelNanda___parquet/NeelNanda--counterfact-tracing-39c4f800d46af5cf/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-a426c1a8645cabdb.arrow
  0%|          | 4/6000 [00:01<29:02,  3.44it/s]Loading cached shuffled indices for dataset at /mnt/ssd-2/hf_cache/NeelNanda___parquet/NeelNanda--count

In [11]:
model.config.hidden_size

5120

In [6]:
# save results
from pathlib import Path
import time

prefix = time.strftime("%Y-%m-%d-%H:%M")
cache_dir = Path("./lr-experiments") / ds_name / prefix
cache_dir.mkdir(exist_ok=True, parents=True)
np.save(cache_dir / "hiddens.npy", hiddens)
np.save(cache_dir / "lm_probs.npy", lm_probs)
np.save(cache_dir / "labels.npy", labels)
np.save(cache_dir / "texts.npy", texts)

NameError: name 'texts' is not defined

In [19]:
from pathlib import Path
cache_dir = Path("lr-experiments/akariasai/PopQA/2023-05-23-04:31 (Vicuna)")
hiddens = np.load(cache_dir / "hiddens.npy")
lm_probs = np.load(cache_dir / "lm_probs.npy")
labels = np.load(cache_dir / "labels.npy")
texts = np.load(cache_dir / "texts.npy", allow_pickle=True)
model_name = "/mnt/ssd-2/nora/vicuna-original-13b"
n_shots = 10 // 2

In [44]:
# make a train/test split and keep them separate
# shuffled_idxs = np.random.permutation(np.arange(len(hiddens)))
# shuffled_hiddens = hiddens[shuffled_idxs]
# shuffled_labels = labels[shuffled_idxs]
# train_size = int(len(shuffled_hiddens) * 0.7)
# train_hiddens = shuffled_hiddens[:train_size]
# test_hiddens = shuffled_hiddens[train_size:]
# train_labels = shuffled_labels[:train_size]
# test_labels = shuffled_labels[train_size:]
# train_texts = texts[shuffled_idxs][:train_size]
# test_texts = texts[shuffled_idxs][train_size:]
# test_lm_probs = lm_probs[shuffled_idxs][train_size:]

# redo the above mess using sklearn
from sklearn.model_selection import train_test_split
# train_hiddens, test_hiddens, train_labels, test_labels, train_texts, test_texts, train_lm_probs, test_lm_probs = train_test_split(
#     hiddens, labels, texts, lm_probs, test_size=0.3, random_state=seed, stratify=labels
# )

# Now do the train test split so that the train and test don't use the same relationship types
train_rels, test_rels = train_test_split(list(q_templates.values()), test_size=0.3)
import re
is_text_in_train = lambda t: any(re.match(template.replace("{}", "*") , t.split("\n\n")[-2]) for template in train_rels)
train_mask = np.array([is_text_in_train(t) for t in texts])
test_mask = ~train_mask
train_hiddens = hiddens[train_mask]
test_hiddens = hiddens[test_mask]
train_labels = labels[train_mask]
test_labels = labels[test_mask]
train_texts = texts[train_mask]
test_texts = texts[test_mask]
train_lm_probs = lm_probs[train_mask]
test_lm_probs = lm_probs[test_mask]

# train a classifier on the hidden states
from sklearn.linear_model import LogisticRegressionCV
# use cross-validation to find the best hyperparameters
# use the best hyperparameters to train a final model
train_rels

['Who is the father of {}?',
 'Who was the producer of {}?',
 'In what country is {}?',
 'Who was the composer of {}?',
 'Who is the author of {}?',
 'In what city was {} born?',
 'Who was the director of {}?',
 'Who was the screenwriter for {}?',
 'Who is the mother of {}?',
 'What is the religion of {}?',
 'What genre is {}?']

In [45]:
sum(train_mask)

1772

In [46]:
texts[0].split("\n\n")[-2]

'Who was the director of Legion? Scott Stewart'

In [47]:
Cs = 10 ** np.linspace(-5, 5, 11)
n_layer = 48  # model.config.num_hidden_layers
layer = 24  # 29 * n_layer // 30 + 1  # the layer to use for classification, somewhat arbitrary but middle layers work better
reporter = LogisticRegressionCV(Cs=Cs, cv=2).fit(train_hiddens[:, layer, :], train_labels)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [48]:
# get reporter regularization parameters
print("best regularization parameter:", reporter.C_[0])
# get model predictions on the test set
test_preds = reporter.predict(test_hiddens[:, layer, :])
test_scores = reporter.predict_proba(test_hiddens[:, layer, :])[:, 1]
confidence_threshold = 0.85
reporter_confident = np.abs(test_scores - 0.5) > (confidence_threshold - 0.5)

is_correct = test_preds == test_labels
acc = np.mean(is_correct)
stderr = 0.5 / np.sqrt(len(test_labels))  # SE_prop = sqrt(p(1-p)/n)
print(f"Accuracy: {acc:.3f} ± {2 * stderr:.3f}")
correct_examples = test_texts[is_correct]
incorrect_examples = test_texts[~is_correct]

# train acc
train_preds = reporter.predict(train_hiddens[:, layer, :])
train_acc = np.mean(train_preds == train_labels)
print(f"Train accuracy: {train_acc:.3f}")

# analyze these examples to see what the reporter is getting right and wrong...

best regularization parameter: 0.01
Accuracy: 0.929 ± 0.031
Train accuracy: 0.958


In [49]:
sum(lm_probs > 0.5) / len(lm_probs)

0.4767857142857143

In [50]:
# lm accuracy
cal_thresh = np.quantile(test_lm_probs, test_labels.mean())  # the LM usually just guesses "no error", so we calibrate it to the true proportion of "no error" examples
lm_preds = test_lm_probs > cal_thresh
lm_acc = np.mean(lm_preds == test_labels)
lm_stderr = 0.5 / np.sqrt(len(test_labels))  # SE_prop = sqrt(p(1-p)/n).
print(f"LM Accuracy: {lm_acc:.3f} ± {2 * lm_stderr:.3f}")

LM Accuracy: 0.907 ± 0.031


In [51]:
def print_texts(texts, labels):
    for text, label in zip(texts, labels):
        # remove the few-shot prefix
        last = "\n\n".join(text.split("\n\n")[-2:])
        print(last)
        print(f"Label: {label}")
        print("-" * 50)

In [52]:
print(f"Results for {model_name} on {ds_name} with {2 * n_shots} shots:")
print(f"Example input:\n{texts[0]}")
print("-" * 50)
print()
print(f"LM accuracy: {lm_acc:.3f} ± {2 * lm_stderr:.3f}")
print(f"Train reporter accuracy (layer {layer}): {train_acc:.3f}")
print(f"Reporter accuracy (layer {layer}): {acc:.3f} ± {2 * stderr:.3f}")
print()

lm_correct = (test_lm_probs > 0.5) == test_labels
reporter_correct = test_preds == test_labels

lm_better = lm_correct & ~reporter_correct
reporter_better = ~lm_correct & reporter_correct
unequal = lm_correct != reporter_correct
print(f"The LM and reporter disagree on {unequal.sum()} examples out of {len(test_lm_probs)}")
print(f"The LM is better on {lm_better.sum()} examples")
print(f"The reporter is better on {reporter_better.sum()} examples")
print()
lm_confident = np.abs(test_lm_probs - 0.5) > (confidence_threshold - 0.5)
print(f"LM is confident on {sum(lm_confident)} examples out of {len(test_lm_probs)} (threshold score = {confidence_threshold:.2f})")
print(f"Reporter is confident on {sum(reporter_confident)} examples out of {len(test_lm_probs)}")
conf_disagree = lm_confident & reporter_confident & (lm_preds != test_preds)
print(f"LM and reporter confidently disagree on {sum(conf_disagree)} examples out of {len(test_lm_probs)}")
conf_disagree_reporter_correct = conf_disagree & is_correct
print(f"Reporter is correct on {sum(conf_disagree_reporter_correct)} out of {sum(conf_disagree)} examples where LM and reporter confidently disagree")
conf_disagree_lm_correct = conf_disagree & ~is_correct
print(f"LM is correct on {sum(conf_disagree_lm_correct)} out of {sum(conf_disagree)} examples where LM and reporter confidently disagree")

Results for /mnt/ssd-2/nora/vicuna-original-13b on NeelNanda/counterfact-tracing with 10 shots:
Example input:
<s>Who was the screenwriter for The Fly? John Landis

Is this true? No

Who was the producer of 19? Debra Hill

Is this true? No

What is the religion of Kumail Nanjiani? atheism

Is this true? Yes

Who is the mother of Otto von Habsburg? Sirikit

Is this true? No

Who was the screenwriter for The Fly? George Langelaan

Is this true? Yes

Who was the producer of 19? Jim Abbiss

Is this true? Yes

Who is the mother of Otto von Habsburg? Zita of Bourbon-Parma

Is this true? Yes

In what city was Rafael Reyes born? Cotija de la Paz

Is this true? Yes

In what city was Rafael Reyes born? Canaan

Is this true? No

What is the religion of Kumail Nanjiani? Episcopal Church

Is this true? No

Who was the director of Legion? Scott Stewart

Is this true?
--------------------------------------------------

LM accuracy: 0.907 ± 0.031
Train reporter accuracy (layer 24): 0.958
Reporter accu

In [53]:
antarctica = np.array(["antarctica" in text.lower() for text in test_texts])
sum(antarctica)

0

In [54]:
print("Examples where LM and reporter confidently disagree:")
print_texts(test_texts[conf_disagree], test_labels[conf_disagree])

Examples where LM and reporter confidently disagree:
What is Jerusalem the capital of? State of Palestine

Is this true?
Label: 1
--------------------------------------------------
What is Vienna the capital of? Habsburg Monarchy

Is this true?
Label: 1
--------------------------------------------------
What is London the capital of? Commonwealth of England

Is this true?
Label: 1
--------------------------------------------------
What color is ruby? pink

Is this true?
Label: 1
--------------------------------------------------
In what city was Steve Kerr born? Cotija de la Paz

Is this true?
Label: 0
--------------------------------------------------
What is the capital of Confederate States of America? Montgomery

Is this true?
Label: 1
--------------------------------------------------
What is Rome the capital of? Papal States

Is this true?
Label: 1
--------------------------------------------------
What is the capital of Haiti? Basse-Terre

Is this true?
Label: 0
----------------

In [37]:
print("Examples where LM and reporter agree:")
print_texts(test_texts[~unequal][:sum(conf_disagree)], test_labels[~unequal][:sum(conf_disagree)])

Examples where LM and reporter agree:
Cognos is headquartered in Ottawa

Is this true?
Label: 1
--------------------------------------------------
The Blue Nile, founded in Moscow

Is this true?
Label: 0
--------------------------------------------------
Paul Louis Courier, a native Dutch

Is this true?
Label: 0
--------------------------------------------------
The native language of Armand Lunel is Chinese

Is this true?
Label: 0
--------------------------------------------------
The law in South African Republic declares the language German

Is this true?
Label: 0
--------------------------------------------------
The native language of Dany Robin is French

Is this true?
Label: 1
--------------------------------------------------
In Turku, they understand English

Is this true?
Label: 0
--------------------------------------------------
Germaine Greer's expertise is chemistry

Is this true?
Label: 0
--------------------------------------------------
Kukla, Fran and Ollie premieres 

In [278]:
def generate(question):
    inputs = tokenizer(question, return_tensors="pt").to(device)
    # generate from model
    outputs = model.generate(**inputs, max_length=len(inputs["input_ids"][0]) + 20, do_sample=False, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return pred

In [238]:
pred = generate("""<s>Chrysler 200, developed by Nokia

Is this true? No

Layar is headquartered in Amsterdam

Is this true? Yes

The capital city of Baghdad Governorate is Tripoli

Is this true? No

Jon Larsen plays opera

Is this true? No

Chrysler 200, developed by Chrysler

Is this true? Yes

The official language of Italy is Korean

Is this true? No

Jon Larsen plays jazz

Is this true? Yes

The capital city of Baghdad Governorate is Baghdad

Is this true? Yes

Layar is headquartered in Canberra

Is this true? No

Peshtera Glacier is located in Antarctica.

Is this true? According to my knowledge,""")
print(pred)

Chrysler 200, developed by Nokia

Is this true? No

Layar is headquartered in Amsterdam

Is this true? Yes

The capital city of Baghdad Governorate is Tripoli

Is this true? No

Jon Larsen plays opera

Is this true? No

Chrysler 200, developed by Chrysler

Is this true? Yes

The official language of Italy is Korean

Is this true? No

Jon Larsen plays jazz

Is this true? Yes

The capital city of Baghdad Governorate is Baghdad

Is this true? Yes

Layar is headquartered in Canberra

Is this true? No

Peshtera Glacier is located in Antarctica.

Is this true? According to my knowledge, Peshtera Glacier is located in the Rila Mountains in Bulgaria.

The capital
