In [1]:
import polars as pl
import pandas as pd
import numpy as np

from settings import gen_dataset

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import sys
import os
import json

# Playing around .json formats

In [2]:
data_path = '/home/onyxia/work/HierarchicProtLM/data/'

In [3]:
train, validation, test = pd.read_json(data_path + 'ECPred40_train.json').drop('index',axis=1).rename(columns={"sequence": "AA_seq"}), pd.read_json(data_path + 'ECPred40_valid.json').rename(columns={"sequence": "AA_seq"}), pd.read_json(data_path + 'ECPred40_test.json').rename(columns={"sequence": "AA_seq"})

In [4]:
train

Unnamed: 0,Protein UniProt Acc.,EC Number,AA_seq
0,Q65GK1,2.5.1.61,MRNIIVGSRRSKLAMTQTKWVIKKLEELNPDFTFEIKEIVTKGDRI...
1,P16616,2.5.1.61,MMRTIKVGSRRSKLAMTQTKWVIQKLKEINPSFAFEIKEIVTKGDR...
2,Q1LU25,2.5.1.61,MLNNILKIATRQSPLAIWQANYVRNQLLSFYPTLLIELVPIVTSGD...
3,Q7VRM4,2.5.1.61,MQAKILRIATRKSPLAICQACYVCNKLKHYHPHIQTELIPIITTGD...
4,Q491Z6,2.5.1.61,MKNKILKIATRKSQLAICQAQYVHNELKHYHPTLSIELMPIVTTGD...
...,...,...,...
258022,Q8R121,0.0.0.0,MRVASSLFLPVLLTEVWLVTSFNLSSHSPEASVHLESQDYENQTWE...
258023,Q3URR7,0.0.0.0,MLAEPVPDALEQEHPGAVKLEEDEVGEEDPRLAESRPRPEVAHQLF...
258024,P54479,0.0.0.0,MNVQEALNLLKENGYKYTNKREDMLQLFADSDRYLTAKNVLSALND...
258025,Q9VA00,0.0.0.0,MSASANLANVYAELMRRCGESYTITYGAPPTYLVSMVGAAEAGKKI...


In [5]:
def split_and_create_columns(row):
    numbers = row['EC Number'].split('.')
    return pd.Series([numbers[0], '.'.join(numbers[:2]), '.'.join(numbers[:3]), '.'.join(numbers[:4])])

In [6]:
train[['ec_first_cat', 'ec_second_cat', 'ec_third_cat', 'ec_fourth_cat']] = train.apply(split_and_create_columns, axis=1)

In [7]:
# Specify the columns you want to concatenate
columns_to_concat = ['ec_first_cat', 'ec_second_cat', 'ec_third_cat', 'ec_fourth_cat']

# Create a new column with grouped numbers as lists
train['labels'] = train.apply(lambda x: list([x[col] for col in columns_to_concat]),axis=1)      


In [8]:
train

Unnamed: 0,Protein UniProt Acc.,EC Number,AA_seq,ec_first_cat,ec_second_cat,ec_third_cat,ec_fourth_cat,labels
0,Q65GK1,2.5.1.61,MRNIIVGSRRSKLAMTQTKWVIKKLEELNPDFTFEIKEIVTKGDRI...,2,2.5,2.5.1,2.5.1.61,"[2, 2.5, 2.5.1, 2.5.1.61]"
1,P16616,2.5.1.61,MMRTIKVGSRRSKLAMTQTKWVIQKLKEINPSFAFEIKEIVTKGDR...,2,2.5,2.5.1,2.5.1.61,"[2, 2.5, 2.5.1, 2.5.1.61]"
2,Q1LU25,2.5.1.61,MLNNILKIATRQSPLAIWQANYVRNQLLSFYPTLLIELVPIVTSGD...,2,2.5,2.5.1,2.5.1.61,"[2, 2.5, 2.5.1, 2.5.1.61]"
3,Q7VRM4,2.5.1.61,MQAKILRIATRKSPLAICQACYVCNKLKHYHPHIQTELIPIITTGD...,2,2.5,2.5.1,2.5.1.61,"[2, 2.5, 2.5.1, 2.5.1.61]"
4,Q491Z6,2.5.1.61,MKNKILKIATRKSQLAICQAQYVHNELKHYHPTLSIELMPIVTTGD...,2,2.5,2.5.1,2.5.1.61,"[2, 2.5, 2.5.1, 2.5.1.61]"
...,...,...,...,...,...,...,...,...
258022,Q8R121,0.0.0.0,MRVASSLFLPVLLTEVWLVTSFNLSSHSPEASVHLESQDYENQTWE...,0,0.0,0.0.0,0.0.0.0,"[0, 0.0, 0.0.0, 0.0.0.0]"
258023,Q3URR7,0.0.0.0,MLAEPVPDALEQEHPGAVKLEEDEVGEEDPRLAESRPRPEVAHQLF...,0,0.0,0.0.0,0.0.0.0,"[0, 0.0, 0.0.0, 0.0.0.0]"
258024,P54479,0.0.0.0,MNVQEALNLLKENGYKYTNKREDMLQLFADSDRYLTAKNVLSALND...,0,0.0,0.0.0,0.0.0.0,"[0, 0.0, 0.0.0, 0.0.0.0]"
258025,Q9VA00,0.0.0.0,MSASANLANVYAELMRRCGESYTITYGAPPTYLVSMVGAAEAGKKI...,0,0.0,0.0.0,0.0.0.0,"[0, 0.0, 0.0.0, 0.0.0.0]"


In [9]:
first_cat = list(set(train['ec_first_cat']))
second_cat = list(set(train['ec_second_cat']))
third_cat = list(set(train['ec_third_cat']))
fourth_cat = list(set(train['ec_fourth_cat']))

In [10]:
first_cat, second_cat, third_cat, fourth_cat

(['3', '0', '4', '6', '1', '2', '5'],
 ['1.18',
  '3.5',
  '1.14',
  '1.6',
  '3.4',
  '1.8',
  '1.3',
  '4.2',
  '1.13',
  '4.99',
  '2.7',
  '6.3',
  '5.2',
  '6.2',
  '1.11',
  '1.2',
  '1.1',
  '2.1',
  '5.4',
  '4.4',
  '3.1',
  '1.10',
  '3.2',
  '2.4',
  '1.15',
  '5.3',
  '6.1',
  '0.0',
  '4.1',
  '3.7',
  '3.3',
  '3.11',
  '2.2',
  '1.16',
  '5.99',
  '6.5',
  '4.6',
  '2.5',
  '1.5',
  '2.8',
  '1.97',
  '6.4',
  '1.9',
  '1.7',
  '2.9',
  '1.17',
  '5.1',
  '3.6',
  '2.6',
  '4.3',
  '1.4',
  '2.3'],
 ['3.5.2',
  '2.7.9',
  '3.5.4',
  '2.7.2',
  '1.5.1',
  '1.3.5',
  '2.1.2',
  '4.2.2',
  '3.4.19',
  '2.6.1',
  '1.16.3',
  '4.3.3',
  '6.4.1',
  '2.3.3',
  '4.1.99',
  '2.8.4',
  '3.6.3',
  '5.99.1',
  '1.10.9',
  '1.6.5',
  '4.1.3',
  '2.3.1',
  '1.9.3',
  '1.14.99',
  '3.1.21',
  '3.1.2',
  '2.9.1',
  '1.14.14',
  '5.1.3',
  '2.7.1',
  '2.7.12',
  '4.3.1',
  '3.4.21',
  '5.1.1',
  '2.7.4',
  '3.1.1',
  '1.7.2',
  '4.1.2',
  '3.6.5',
  '1.2.4',
  '3.1.27',
  '3.1.5',
  '4.4

In [11]:
all_cat = first_cat + second_cat + third_cat + fourth_cat

In [12]:
all_cat.sort()
len(all_cat)

828

828 différentes classes 

In [13]:
label2idx = {ec:i for i,ec in enumerate(all_cat)}

In [14]:
label2idx

{'0': 0,
 '0.0': 1,
 '0.0.0': 2,
 '0.0.0.0': 3,
 '1': 4,
 '1.1': 5,
 '1.1.1': 6,
 '1.1.1.1': 7,
 '1.1.1.103': 8,
 '1.1.1.17': 9,
 '1.1.1.18': 10,
 '1.1.1.205': 11,
 '1.1.1.23': 12,
 '1.1.1.25': 13,
 '1.1.1.261': 14,
 '1.1.1.262': 15,
 '1.1.1.267': 16,
 '1.1.1.27': 17,
 '1.1.1.290': 18,
 '1.1.1.34': 19,
 '1.1.1.37': 20,
 '1.1.1.38': 21,
 '1.1.1.42': 22,
 '1.1.1.44': 23,
 '1.1.1.49': 24,
 '1.1.1.8': 25,
 '1.1.1.85': 26,
 '1.1.1.86': 27,
 '1.1.1.94': 28,
 '1.1.5': 29,
 '1.1.5.3': 30,
 '1.1.5.4': 31,
 '1.10': 32,
 '1.10.2': 33,
 '1.10.2.2': 34,
 '1.10.3': 35,
 '1.10.3.2': 36,
 '1.10.3.9': 37,
 '1.10.9': 38,
 '1.10.9.1': 39,
 '1.11': 40,
 '1.11.1': 41,
 '1.11.1.15': 42,
 '1.11.1.21': 43,
 '1.11.1.6': 44,
 '1.11.1.7': 45,
 '1.11.1.9': 46,
 '1.13': 47,
 '1.13.11': 48,
 '1.13.11.11': 49,
 '1.13.11.5': 50,
 '1.13.11.54': 51,
 '1.13.11.6': 52,
 '1.14': 53,
 '1.14.13': 54,
 '1.14.13.9': 55,
 '1.14.14': 56,
 '1.14.14.1': 57,
 '1.14.14.18': 58,
 '1.14.14.5': 59,
 '1.14.99': 60,
 '1.14.99.46': 61,
 

In [16]:
def translate_list(lst):
    return [label2idx[item] for item in lst]

train['labels'] = train['labels'].apply(translate_list)

In [17]:
train

Unnamed: 0,Protein UniProt Acc.,EC Number,AA_seq,ec_first_cat,ec_second_cat,ec_third_cat,ec_fourth_cat,labels
0,Q65GK1,2.5.1.61,MRNIIVGSRRSKLAMTQTKWVIKKLEELNPDFTFEIKEIVTKGDRI...,2,2.5,2.5.1,2.5.1.61,"[156, 282, 283, 294]"
1,P16616,2.5.1.61,MMRTIKVGSRRSKLAMTQTKWVIQKLKEINPSFAFEIKEIVTKGDR...,2,2.5,2.5.1,2.5.1.61,"[156, 282, 283, 294]"
2,Q1LU25,2.5.1.61,MLNNILKIATRQSPLAIWQANYVRNQLLSFYPTLLIELVPIVTSGD...,2,2.5,2.5.1,2.5.1.61,"[156, 282, 283, 294]"
3,Q7VRM4,2.5.1.61,MQAKILRIATRKSPLAICQACYVCNKLKHYHPHIQTELIPIITTGD...,2,2.5,2.5.1,2.5.1.61,"[156, 282, 283, 294]"
4,Q491Z6,2.5.1.61,MKNKILKIATRKSQLAICQAQYVHNELKHYHPTLSIELMPIVTTGD...,2,2.5,2.5.1,2.5.1.61,"[156, 282, 283, 294]"
...,...,...,...,...,...,...,...,...
258022,Q8R121,0.0.0.0,MRVASSLFLPVLLTEVWLVTSFNLSSHSPEASVHLESQDYENQTWE...,0,0.0,0.0.0,0.0.0.0,"[0, 1, 2, 3]"
258023,Q3URR7,0.0.0.0,MLAEPVPDALEQEHPGAVKLEEDEVGEEDPRLAESRPRPEVAHQLF...,0,0.0,0.0.0,0.0.0.0,"[0, 1, 2, 3]"
258024,P54479,0.0.0.0,MNVQEALNLLKENGYKYTNKREDMLQLFADSDRYLTAKNVLSALND...,0,0.0,0.0.0,0.0.0.0,"[0, 1, 2, 3]"
258025,Q9VA00,0.0.0.0,MSASANLANVYAELMRRCGESYTITYGAPPTYLVSMVGAAEAGKKI...,0,0.0,0.0.0,0.0.0.0,"[0, 1, 2, 3]"


To do : 
- [X] concaténer les colonnes de label en une liste puis mapper les éléments de la liste à leur indice de label donné par le dictionnaire label2idx
- [] dataloader et optimizer donnés par huggingface, même si le modèle est maison?
- [] faire un petit training loop pour tester si ça marche en se connectant sur le cluster chilien


### Loading model and tokenizer to use weights and tokenizer from the paper.

In [20]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.40.1-py3-none-any.whl.metadata (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.0/138.0 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.23.0-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.4.28-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading transformers-4.40.1-py3-none-any.whl (9.0 MB)


In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

#tokenizer = AutoTokenizer.from_pretrained("ElnaggarLab/ankh-base")
#model = AutoModelForSeq2SeqLM.from_pretrained("ElnaggarLab/ankh-base")
model2 = AutoModelForSeq2SeqLM.from_pretrained("ElnaggarLab/ankh-base-encoder", from_flax=True)

All Flax model weights were used when initializing T5ForConditionalGeneration.

Some weights of T5ForConditionalGeneration were not initialized from the Flax model and are newly initialized: ['decoder.block.11.layer.0.SelfAttention.k.weight', 'decoder.block.14.layer.0.layer_norm.weight', 'decoder.block.8.layer.2.DenseReluDense.wo.weight', 'decoder.block.20.layer.1.EncDecAttention.k.weight', 'decoder.block.4.layer.1.layer_norm.weight', 'decoder.block.17.layer.2.layer_norm.weight', 'decoder.block.16.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.22.layer.2.layer_norm.weight', 'decoder.block.19.layer.0.SelfAttention.v.weight', 'decoder.block.23.layer.1.EncDecAttention.q.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.11.layer.2.DenseReluDense.wo.weight', 'decoder.block.12.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.14.layer.1.EncDecAttention.v.weight', 'decoder.embed_tokens.weight', 'decoder.block.15.layer.2.DenseReluDense.wi_1.weight', 'decoder.b

In [8]:
dir(model2)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_assisted_decoding',
 '_auto_class',
 '_autoset_attn_implementation',
 '_backward_compatibility_gradient_checkpointing',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_beam_sample',
 '_beam_search',
 '_buffers',
 '_call_impl',
 '_check_and_enable_flash_attn_2',
 '_check_and_enable_sdpa',
 '_compiled_call_impl',
 '_constrained_beam_search',
 '_contrastive_search',
 '_convert_head_mask_to_5d',
 '_copy_lm_head_original_to_resized',
 '_create_repo',
 '_dispatch_accelerate_model',
 '_expand_inputs_for_generation',
 

In [17]:
model_state_dict = model.state_dict()
model_state_dict2 = model2.state_dict()

In [15]:
model_state_dict

OrderedDict([('shared.weight',
              tensor([[  1.7559,  -1.4321,   0.5260,  ...,  -0.4850,  -1.4547,  -0.2726],
                      [  4.8610, -15.1613,   8.2459,  ...,   8.1269,   1.6232,   6.6230],
                      [  0.7530,  -0.7841,   0.2071,  ...,   1.1303,  -0.3343,  -0.0358],
                      ...,
                      [ -1.8502,  -0.7035,   0.3488,  ...,  -3.1780,   1.2377,  -1.9930],
                      [ -1.1058,  -1.3720,   0.4057,  ...,  -3.7841,   2.7120,   1.2544],
                      [ -1.4967,  -1.5860,  -0.3208,  ...,   0.5593,  -0.3124,   0.6550]])),
             ('encoder.embed_tokens.weight',
              tensor([[  1.7559,  -1.4321,   0.5260,  ...,  -0.4850,  -1.4547,  -0.2726],
                      [  4.8610, -15.1613,   8.2459,  ...,   8.1269,   1.6232,   6.6230],
                      [  0.7530,  -0.7841,   0.2071,  ...,   1.1303,  -0.3343,  -0.0358],
                      ...,
                      [ -1.8502,  -0.7035,   0.3488,  ...

In [18]:
model_state_dict2

OrderedDict([('shared.weight',
              tensor([[  1.7559,  -1.4321,   0.5260,  ...,  -0.4850,  -1.4547,  -0.2726],
                      [  4.8610, -15.1613,   8.2459,  ...,   8.1269,   1.6232,   6.6230],
                      [  0.7530,  -0.7841,   0.2071,  ...,   1.1303,  -0.3343,  -0.0358],
                      ...,
                      [ -1.8502,  -0.7035,   0.3488,  ...,  -3.1780,   1.2377,  -1.9930],
                      [ -1.1058,  -1.3720,   0.4057,  ...,  -3.7841,   2.7120,   1.2544],
                      [ -1.4967,  -1.5860,  -0.3208,  ...,   0.5593,  -0.3124,   0.6550]])),
             ('encoder.embed_tokens.weight',
              tensor([[  1.7559,  -1.4321,   0.5260,  ...,  -0.4850,  -1.4547,  -0.2726],
                      [  4.8610, -15.1613,   8.2459,  ...,   8.1269,   1.6232,   6.6230],
                      [  0.7530,  -0.7841,   0.2071,  ...,   1.1303,  -0.3343,  -0.0358],
                      ...,
                      [ -1.8502,  -0.7035,   0.3488,  ...

In [6]:
model2.parameters

<bound method Module.parameters of T5ForConditionalGeneration(
  (shared): Embedding(144, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(144, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(64, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=3072, bias=False)
              (wi_1): Linear(in_features=768, out_features=3072, 

In [26]:
dir(model)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_assisted_decoding',
 '_auto_class',
 '_autoset_attn_implementation',
 '_backward_compatibility_gradient_checkpointing',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_beam_sample',
 '_beam_search',
 '_buffers',
 '_call_impl',
 '_check_and_enable_flash_attn_2',
 '_check_and_enable_sdpa',
 '_compiled_call_impl',
 '_constrained_beam_search',
 '_contrastive_search',
 '_convert_head_mask_to_5d',
 '_copy_lm_head_original_to_resized',
 '_create_repo',
 '_dispatch_accelerate_model',
 '_expand_inputs_for_generation',
 

In [27]:
model.load_state_dict

<bound method Module.load_state_dict of T5ForConditionalGeneration(
  (shared): Embedding(144, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(144, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(64, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=3072, bias=False)
              (wi_1): Linear(in_features=768, out_features=3

In [29]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

736453632

In [31]:
!pip install ankh

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting ankh
  Using cached ankh-1.10.0-py3-none-any.whl.metadata (18 kB)
Collecting biopython<2.0,>=1.80 (from ankh)
  Using cached biopython-1.83-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting datasets<3.0.0,>=2.7.1 (from ankh)
  Using cached datasets-2.19.0-py3-none-any.whl.metadata (19 kB)
Collecting sentencepiece<0.2.0,>=0.1.97 (from ankh)
  Using cached sentencepiece-0.1.99.tar.gz (2.6 MB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pyarrow-hotfix (from datasets<3.0.0,>=2.7.1->ankh)
  Using cached pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting xxhash (from datasets<3.0.0,>=2.7.1->ankh)
  Using cached xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets<3.0.0,>=2.7.1->ankh)
  Using cached multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Using cached ankh-1.10.0-py3-none-any.whl (31 kB)
Using cached biopython-1.83-cp312-cp312