In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

In [2]:
lcat_text_path = "./resources/images/lightfootcat/images/extracted_text_tess2.txt"


def read_file(file_path):
    """Read the content of a file."""
    with open(file_path, "r") as file:
        return file.read()


lcat_text = read_file(lcat_text_path)

print("Lightfoot Cat text loaded successfully.")

Lightfoot Cat text loaded successfully.


In [3]:
import re

## Cleaning

#### Page numbers and Catalogue common headers

In [4]:
# Remove page numbers that are just digits surrounded by newlines
lcat_text = re.sub(r"(?m)^\s*\d+\s*$\n?", r"", lcat_text)

#Remove common headers
common_headers = ["John Lightfoot"]
alts = "|".join([re.escape(header) for header in common_headers])
common_headers_regex = rf"(?<=\n\n)[^\n]*?(?:{alts})[^\n]*?(?=\n\n)"
lcat_text = re.sub(common_headers_regex, r"", lcat_text, flags=re.IGNORECASE)



### Fix hyphens at line breaks

In [5]:
lcat_text = re.sub(r"(\w+)-\s*\n\s*(\w+)", r"\1\2", lcat_text)

### Collapse extra newlines and normalise space

In [6]:
lcat_text = re.sub(r"\n{2,}", r"\n", lcat_text)  # Collapse multiple newlines

In [7]:
lcat_text = re.sub(r"[ \t]{2,}", " ", lcat_text).strip()  # Find multiple spaces

In [8]:
print(lcat_text)

Dicotyledones
ACERACEAE
Acer campestre L.
1 folder. Acer campestre [TA]
Acer pseudoplatanus L.
2 folders. 07
Folder 1. Acer Pseudo-Platanus
[(G]. i. "Maple. Bulls: [Bulstrode]
Park” [JL]
Folder 2. Acer Pseudo-Platanus
[TA].
AMARANTHACEAE .
Amaranthus lividus L., Flora Europaea
1: 110 (1964)
1 folder. Amaranthus Blitum [TA].
i. Cites Ray's Syn. 1957. ii. '"Blite
Amaranth. Aug.! It is often found
on Dunghills in the neighbourhood
of London. [ gather'd this on a
Dunghill at Fulham near London"
UL]. iii, “Amaranthus Blitum.
Monoec: 5. and:" [JL]
ARALIACEAE
Hedera helix L.
1 folder. Hedera Helix [TA].
ARISTOLOCHIACEAE
Asarum europaeum L.
1 folder. Asarum curopaeum [TA].
i. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL].
BALSAMINACEAE
Impatiens noli-tangere L..
1 folder. Impatiens Noli me tangere
[G].
BETULACEAE
Alnus glutinosa (L.) Gaertn.
1 folder. Betula Alnus [TA].
Betula nana L. ]
1 folder. Betula nana [G].
CAMPANULACEAE
Campanula latifolia L.
2 folders.
Folder 1. Companula l

In [9]:
legacy = [
    "COMPOSITAE", "GRAMINEAE", "LEGUMINOSAE", "PALMAE",
    "UMBELLIFERAE", "CRUCIFERAE", "LABIATAE", "GUTTIFERAE",
    "PAPILIONACEAE", "MIMOSACEAE", "CAESALPINIACEAE"
]
legacy_alts = "|".join([re.escape(alt) for alt in legacy])

family_regex = rf"\b([A-Z]+ACEAE|{legacy_alts})\b"


lcat_text = re.sub(family_regex, r"\n\n\1\n\n", lcat_text)  # Find all uppercase words
lcat_text = re.sub(r"\n{3,}", r"\n\n", lcat_text)  # Collapse multiple newlines

In [10]:
print(lcat_text)

Dicotyledones

ACERACEAE

Acer campestre L.
1 folder. Acer campestre [TA]
Acer pseudoplatanus L.
2 folders. 07
Folder 1. Acer Pseudo-Platanus
[(G]. i. "Maple. Bulls: [Bulstrode]
Park” [JL]
Folder 2. Acer Pseudo-Platanus
[TA].

AMARANTHACEAE

 .
Amaranthus lividus L., Flora Europaea
1: 110 (1964)
1 folder. Amaranthus Blitum [TA].
i. Cites Ray's Syn. 1957. ii. '"Blite
Amaranth. Aug.! It is often found
on Dunghills in the neighbourhood
of London. [ gather'd this on a
Dunghill at Fulham near London"
UL]. iii, “Amaranthus Blitum.
Monoec: 5. and:" [JL]

ARALIACEAE

Hedera helix L.
1 folder. Hedera Helix [TA].

ARISTOLOCHIACEAE

Asarum europaeum L.
1 folder. Asarum curopaeum [TA].
i. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL].

BALSAMINACEAE

Impatiens noli-tangere L..
1 folder. Impatiens Noli me tangere
[G].

BETULACEAE

Alnus glutinosa (L.) Gaertn.
1 folder. Betula Alnus [TA].
Betula nana L. ]
1 folder. Betula nana [G].

CAMPANULACEAE

Campanula latifolia L.
2 folders.
Folder

In [11]:
import unicodedata

def normalize_unicode(text):
    """Normalize Unicode characters in the text."""
    text = unicodedata.normalize("NFKD", text)
    text = text.replace("‘", "'").replace("’", "'")  # Replace smart quotes with straight quotes
    text = text.replace("“", '"').replace("”", '"')  # Replace smart quotes with straight quotes
    text = text.replace("–", "-")  # Replace en dash with hyphen
    text = text.replace("—", "-")  # Replace em dash with hyphen
    text = text.replace("…", "...")  # Replace ellipsis with three dots
    
    return text

lcat_text = normalize_unicode(lcat_text)

In [12]:
print(lcat_text)

Dicotyledones

ACERACEAE

Acer campestre L.
1 folder. Acer campestre [TA]
Acer pseudoplatanus L.
2 folders. 07
Folder 1. Acer Pseudo-Platanus
[(G]. i. "Maple. Bulls: [Bulstrode]
Park" [JL]
Folder 2. Acer Pseudo-Platanus
[TA].

AMARANTHACEAE

 .
Amaranthus lividus L., Flora Europaea
1: 110 (1964)
1 folder. Amaranthus Blitum [TA].
i. Cites Ray's Syn. 1957. ii. '"Blite
Amaranth. Aug.! It is often found
on Dunghills in the neighbourhood
of London. [ gather'd this on a
Dunghill at Fulham near London"
UL]. iii, "Amaranthus Blitum.
Monoec: 5. and:" [JL]

ARALIACEAE

Hedera helix L.
1 folder. Hedera Helix [TA].

ARISTOLOCHIACEAE

Asarum europaeum L.
1 folder. Asarum curopaeum [TA].
i. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL].

BALSAMINACEAE

Impatiens noli-tangere L..
1 folder. Impatiens Noli me tangere
[G].

BETULACEAE

Alnus glutinosa (L.) Gaertn.
1 folder. Betula Alnus [TA].
Betula nana L. ]
1 folder. Betula nana [G].

CAMPANULACEAE

Campanula latifolia L.
2 folders.
Folder

In [13]:
divisions = ["Dicotyledones", "Monocotyledones", "Pteridophyta", "Hepaticae", "Algae"]

divisions_alts = "|".join([re.escape(division) for division in divisions])
division_regex = rf"\b({divisions_alts})\b"

division_chunk = re.findall(rf"({division_regex}.*?)(?={division_regex}|$)", lcat_text, re.DOTALL)

In [14]:
def get_family_chunks(x):
    matches = re.findall(rf"{family_regex}(.*?)(?={family_regex}|$)", x, re.DOTALL)

    if matches:
        return matches
    else:
        return [(x.strip(), "No family found", "")]
family_chunks = {chunk[1] : get_family_chunks(chunk[0]) for chunk in division_chunk}

In [15]:
def split_by_families(text: str):
        finds = re.finditer(family_regex, text)

        find_matches = [i for i in finds]

        text_chunks = []
        
        for idx, i in enumerate(find_matches):
            match = re.sub(r"[.\n\t,]*\s*([A-Z]+)\s*[.\n\t,]*", r"\1", i.group())
            start = i.end()
            end = find_matches[idx+1].start() if idx+1 < len(find_matches) else None
            text_chunk = text[start:end] if end else text[start:]
            text_chunks.append(dict(family=match, text=text_chunk))
        
        if text_chunks:
              return text_chunks
        else:
              return [{"family": "No family found", "text": text.strip()}]

In [16]:
family_chunks = {chunk[1] : split_by_families(chunk[0]) for chunk in division_chunk}

In [17]:
family_chunks

{'Dicotyledones': [{'family': 'ACERACEAE',
   'text': '\n\nAcer campestre L.\n1 folder. Acer campestre [TA]\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus\n[(G]. i. "Maple. Bulls: [Bulstrode]\nPark" [JL]\nFolder 2. Acer Pseudo-Platanus\n[TA].\n\n'},
  {'family': 'AMARANTHACEAE',
   'text': '\n\n .\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. ii. \'"Blite\nAmaranth. Aug.! It is often found\non Dunghills in the neighbourhood\nof London. [ gather\'d this on a\nDunghill at Fulham near London"\nUL]. iii, "Amaranthus Blitum.\nMonoec: 5. and:" [JL]\n\n'},
  {'family': 'ARALIACEAE',
   'text': '\n\nHedera helix L.\n1 folder. Hedera Helix [TA].\n\n'},
  {'family': 'ARISTOLOCHIACEAE',
   'text': '\n\nAsarum europaeum L.\n1 folder. Asarum curopaeum [TA].\ni. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL].\n\n'},
  {'family': 'BALSAMINACEAE',
   'text': '\n\nImpatiens noli-tangere L..\n1 fold

In [18]:
total_tokens = sum(len(chunk["text"].split()) for chunks in family_chunks.values() for chunk in chunks)

In [19]:
total_tokens

41136

In [20]:
prompt = "resources/prompts/lightfootcat_prompt.yaml"

from lib.model.ocr_model import OCRModel

ocr_model = OCRModel(prompt)

2025-08-07 13:58:12,738 - lib.utils.promptLoader - INFO - Loading prompt from resources/prompts/lightfootcat_prompt.yaml
2025-08-07 13:58:12,803 - lib.utils.promptLoader - INFO - Loading prompt from ./resources/settings.yaml
2025-08-07 13:58:12,858 - lib.model.base_model - INFO - Loading model: mistral7b with batch size: 1, max tokens: 4096, temperature: 0.01


  from .autonotebook import tqdm as notebook_tqdm


2025-08-07 13:58:17,205 - lib.model - INFO - Importing model: mistral7b
2025-08-07 13:58:17,205 - lib.model - INFO - Model imported successfully: mistral7b


In [21]:
def prettyPrint(message):
    """Print the message in a readable format."""
    print("\n" + "="*50)
    print(message)
    print("="*50 + "\n")

In [None]:

system_prompt = (
    "You are an expert in cleaning OCR text\n"
    "You will be provided with a text containing botanical information from a historical botanical catalogue.\n"
    "The text contains botanical information, including family names, species names, and other relevant details.\n"
    "This information denotes the how each speciemen is stored in the catalogue.\n"
    # "You task is to list all ocr artefacts, grammatical errors, and formatting issues in the text.\n"
    # "You will not make any changes to the text.\n"
    # "Do not make any assumption about the text, if you are not sure about something, keep the original text.\n"
    # "Think step by step and provide a detailed analysis of the text.\n"
    # "Return a rating out of 10 for the overall quality of the text.\n"
    "Your task is to clean the text by following the rules:\n"
    "1. Find and clean any OCR artefacts, like missing spaces, incorrect characters, or formatting issues.\n"
    "2. Join any words that are split across lines, ensuring that the meaning is preserved. Ensure the lines joined are contextually appropriate.\n"
    "3. Only return the cleaned text, without any additional comments or explanations.\n"
    #"4. Compare and return an accuracy rating out of 10 between the original and cleaned text. Higher the rating, the more accurate. The returned rating should be at the end of the cleaned text following the strucutre: RATING: <rating>\n"
    "Few-Shot Examples:\n"
    "Input: 'Acer campestre L.\n1 folder. Acer campestre [TA]\n\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus\n[(G]. i. \"Maple. Bulls: [Bulstrode]\nPark\" [JL]\nFolder 2. Acer Pseudo-Platanus\n[TA].'\n"
    "Output: 'Acer campestre L.\n1 folder. Acer campestre L.\n\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus L. \"Maple. Bulls: Bulstrode Park\" JL\nFolder 2. Acer Pseudo-Platanus L. [TA]'\n"
    "Input: '.\n\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. ii. \"Blite\nAmaranth. Aug.! It is often found\non Dunghills in the neighbourhood\nof London. [ gather\'d this on a\nDunghill at Fulham near London\"\nUL]. iii, \"Amaranthus Blitum.\nMonoec: 5. and:\" [JL]'\n"
    "Output: 'Amaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. \nii. \"Blite Amaranth. Aug. It is often found on Dunghills in the neighbourhood of London. [gather\'d this on a Dunghill at Fulham near London]\" UL. \niii, \"Amaranthus Blitum. Monoec: 5. and:\" [JL]'"
)

user_prompt = lambda text: (
    "By following the rules cleaned the following OCR'd text:\n\n"
    f"{text}\n"
)

# sample = family_chunks["Dicotyledones"][8][1][:1000]

# prettyPrint(f"Sample text for analysis:\n{sample}") 

# conversation = [{"role": "system", "content": system_prompt},
#                 {"role": "user", "content": user_prompt(sample)}]


# output = ocr_model.model(conversation=conversation)


# prettyPrint(f"Output from the model:\n{output[0]}")

In [23]:
from lib.data_processing.text_processing import TextProcessor

tp = TextProcessor()

chunks_tp = tp(lcat_text,divisions=divisions, max_chunk_size=1000)

Chunker is not loaded. Loading Chunker...


  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


In [24]:
chunks_tp

{'Dicotyledones': [{'family': 'ACERACEAE',
   'text': '\n\nAcer campestre L.\n1 folder. Acer campestre [TA]\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus\n[(G]. i. "Maple. Bulls: [Bulstrode]\nPark" [JL]\nFolder 2. Acer Pseudo-Platanus\n[TA].\n\n',
   'species': ['Acer campestre L.\n1 folder. Acer campestre [TA]\n\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus\n[(G]. i. "Maple. Bulls: [Bulstrode]\nPark" [JL]\nFolder 2. Acer Pseudo-Platanus\n[TA].']},
  {'family': 'AMARANTHACEAE',
   'text': '\n\n .\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. ii. \'"Blite\nAmaranth. Aug.! It is often found\non Dunghills in the neighbourhood\nof London. [ gather\'d this on a\nDunghill at Fulham near London"\nUL]. iii, "Amaranthus Blitum.\nMonoec: 5. and:" [JL]\n\n',
   'species': ['.\n\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ra

In [25]:
all_species = [sp for family in chunks_tp.values() for species in family for sp in species["species"]]

In [26]:
len(all_species)

349

In [27]:
chunks = all_species
batch_size = 32
cleaned_chunks = []
batches = [chunks[i:i + batch_size] for i in range(0, len(chunks), batch_size)]

In [28]:
batches

[['Acer campestre L.\n1 folder. Acer campestre [TA]\n\nAcer pseudoplatanus L.\n2 folders. 07\nFolder 1. Acer Pseudo-Platanus\n[(G]. i. "Maple. Bulls: [Bulstrode]\nPark" [JL]\nFolder 2. Acer Pseudo-Platanus\n[TA].',
  '.\n\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. ii. \'"Blite\nAmaranth. Aug.! It is often found\non Dunghills in the neighbourhood\nof London. [ gather\'d this on a\nDunghill at Fulham near London"\nUL]. iii, "Amaranthus Blitum.\nMonoec: 5. and:" [JL]',
  'Hedera helix L.\n1 folder. Hedera Helix [TA].',
  'Asarum europaeum L.\n1 folder. Asarum curopaeum [TA].\ni. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL].',
  'Impatiens noli-tangere L..\n\n1 folder. Impatiens Noli me tangere\n\n[G].',
  'Alnus glutinosa (L.) Gaertn.\n1 folder. Betula Alnus [TA].\n\nBetula nana L. ]\n1 folder. Betula nana [G].',
  'Campanula latifolia L.\n2 folders.\nFolder 1. Companula latifolia [TA].\nFolder 2. Compa

In [29]:
(batches[0][1])

'.\n\nAmaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957. ii. \'"Blite\nAmaranth. Aug.! It is often found\non Dunghills in the neighbourhood\nof London. [ gather\'d this on a\nDunghill at Fulham near London"\nUL]. iii, "Amaranthus Blitum.\nMonoec: 5. and:" [JL]'

In [30]:
from tqdm import tqdm
import torch
conversation = lambda text: [{"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt(text)}]
outputs = []
for batch in tqdm(batches, desc="Processing batches", unit="batch"):
    with torch.no_grad():
        conversations = [conversation(chunk) for chunk in batch]
        outputs.extend(ocr_model.model(conversation=conversations))
    del conversations
    torch.cuda.empty_cache()
    #context = cut_output[-context_size:] if len(cut_output) > context_size else cut_output


Processing batches:   0%|          | 0/11 [00:00<?, ?batch/s]

Loading model for [mistralai/Mistral-7B-Instruct-v0.3] to device [cuda]


Loading checkpoint shards: 100%|██████████| 3/3 [00:22<00:00,  7.49s/it]


Loading processor for [mistralai/Mistral-7B-Instruct-v0.3] to device [cuda:0]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:   9%|▉         | 1/11 [02:30<25:02, 150.24s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  18%|█▊        | 2/11 [05:37<25:49, 172.13s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  27%|██▋       | 3/11 [09:55<28:10, 211.37s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  36%|███▋      | 4/11 [11:48<20:07, 172.50s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  45%|████▌     | 5/11 [17:02<22:20, 223.40s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  55%|█████▍    | 6/11 [19:27<16:23, 196.66s/batch]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Processing batches:  64%|██████▎   | 7/11 [25:09<16:17, 244.28s/batch]Setting `pad_token_id` to `eos_tok

In [35]:
(outputs[1])

'\'Amaranthus lividus L., Flora Europaea\n1: 110 (1964)\n1 folder. Amaranthus Blitum [TA].\ni. Cites Ray\'s Syn. 1957.\nii. "Blite Amaranth. Aug. It is often found on Dunghills in the neighbourhood of London. [gather\'d this on a Dunghill at Fulham near London]" UL.\niii. "Amaranthus Blitum. Monoec: 5. and:" [JL]\''

In [36]:
print("\n\n".join(outputs))

Acer campestre L.
1 folder. Acer campestre L.

Acer pseudoplatanus L.
2 folders. 07
Folder 1. Acer Pseudo-Platanus L. "Maple. Bulls: Bulstrode Park" JL
Folder 2. Acer Pseudo-Platanus L. [TA]

'Amaranthus lividus L., Flora Europaea
1: 110 (1964)
1 folder. Amaranthus Blitum [TA].
i. Cites Ray's Syn. 1957.
ii. "Blite Amaranth. Aug. It is often found on Dunghills in the neighbourhood of London. [gather'd this on a Dunghill at Fulham near London]" UL.
iii. "Amaranthus Blitum. Monoec: 5. and:" [JL]'

Hedera helix L.
1 folder. Hedera helix L.

Asarum europaeum L.
1 folder. Asarum europaeum L.

i. "Asarum europaeum Lin: Asarabacca. Anglis. ex horto" [JL]

Impatiens noli-tangere L.
1 folder. Impatiens noli-tangere L.

[G]

Alnus glutinosa (L.) Gaertn.
1 folder. Alnus glutinosa (L.) Gaertn.

Betula nana L.
1 folder. Betula nana L.

Campanula latifolia L.
2 folders. Folder 1. Campanula latifolia L.
Folder 2. Campanula latifolia [G].

Campanula patula L.
1 folder. Campanula patula L.

Campanula ra

In [33]:
print("\n".join(cleaned_chunks))




In [34]:
tc.merge_sentences(cleaned_chunks)

NameError: name 'tc' is not defined

In [None]:
print(cleaned_chunks[0])

Arenaria serpyllifolia L.
Folder. Arenaria Serpyllifolia [TA].
Bufonia tenuifolia L., Flora Europaea.'
1: 133 (1964)
1 folder. Bufonia Tenuifolia [TA].
[Not a British plant, but from southern and eastern Europe.]
Cerastium alpinum L.
2 folders.
Folder 1. Cerastium latifolium [G].
i. Cites Ray's Syn. 349. ii. "Cerastium alpinum. This was gathered upon Snowdon at the top of the highest Rock call'd Clogwyn y Garnedh. June" [JL]. iii. "Top of Snowdon" DL]. iv. "Dr. Solander affirms this to be the true Cerastium alpinum. I had it from Snowdon. He found it at Terra del Fuego & named it C. hirtum with a mark of Dubitation, but now thinks them both one" [JL].
Folder 2. Cerastium alpinum [TA]; Cerastium - latifolium [G]. i "Clogwyn da" [JL]. ii. *Cerast: alpin: Ben Lomond" Ut].
Cerastium arcticum Lange
2 folders.
Folder 1. Cerastium alpinum [G].
i. "Anglesey. Cerastium alpinum. Q: an Var: aut nova species?" Fig: Cerast: alp: non quadrat: Flor: Dan: The Fig: in the fl. Dan: of Cerast: alpinum ag

In [None]:
cleaned_chunks

['Arenaria serpyllifolia L.\nfolder. Arenaria Serpyllifolia [TA].\nBufonia tenuifolia L., Flora Europaea.\'\n1: 133 (1964)\n1 folder. Bufonia Tenuifolia [TA].\n[Not a British plant, but from southern and eastern Europe.]\nCerastium alpinum L.\n2 folders.\nFolder 1. Cerastium latifolium [G].\ni. Cites Ray\'s Syn. 349. ii. "Cerastium alpinum. This was gathered upon Snowdon at the top of the highest Rock call\'d Clogwyn y Garnedh. June" [JL]. iii. "Top of Snowdon" DL]. iv. "Dr. Solander affirms this to be the true Cerastium alpinum. I had it from Snowdon. He found it at Terra del Fuego & named it C. hirtum with a mark of Dubitation, but now thinks them both one" [JL].\nFolder 2. Cerastium alpinum [TA]; Cerastium - latifolium [G]. i "Clogwyn da" [JL]. ii. *Cerast: alpin: Ben Lomond" Ut].\nCerastium arcticum Lange\n2 folders.\nFolder 1. Cerastium alpinum [G].\ni. "Anglesey. Cerastium alpinum. Q: an Var: aut nova species? Fig: Cerast: alp: non quadrat: Flor: Dan: The Fig: in the fl. Dan: of 