In [29]:
import pandas as pd

csv1_filename = "funfam_members_plus_ec_terms.csv"

EC_CODE = '3.1.1.'

# read all the data into a dict
ffm_by_id = {}
with open(csv1_filename) as csv1_fh:
    headers = csv1_fh.readline().strip()
    for line in csv1_fh:
        line = line.replace('\"', '')
        line = line.strip()
        funfam_number, member_id, member_type, sequence_md5, uniprot_acc, ec_code = line.split(',')
        superfamily_id = '?.?.?.?'
        ff_id = f'{superfamily_id}-ff-{funfam_number}'
        ffm_dict = {
            'funfam_number': funfam_number,
            'superfamily_id': superfamily_id,
            'member_id': member_id,
            'ec_code': ec_code,
            'uniprot_acc': uniprot_acc,
            'funfam_id': ff_id,
        }
        ffm_by_id[member_id] = ffm_dict

def print_first_entries(ffm_entries):
    for member_id in list(ffm_entries.keys())[:5]:
        ffm_dict = ffm_entries[member_id]
        print(f"EC: {ffm_dict['uniprot_acc']}")

print("EXAMPLE 1")
ffm_in_ec_code = {}
for member_id in ffm_by_id.keys():
    ffm_dict = ffm_by_id[member_id]
    if ffm_dict['ec_code'].startswith(EC_CODE):
        ffm_in_ec_code[member_id] = ffm_dict
print_first_entries(ffm_in_ec_code)

# equivalent to above                
print("EXAMPLE 2")
ffm_in_ec_code = {}
for member_id, ffm_dict in ffm_by_id.items():
    if ffm_dict['ec_code'].startswith(EC_CODE):
        ffm_in_ec_code[member_id] = ffm_dict
print_first_entries(ffm_in_ec_code)

  
# list comprehension
# mylist1 = ('a', 'b', 'c', 'd', 'c3')
# mylist2 = [v for v in mylist1 if v.startswith('c')]

# equivalent to above
print("EXAMPLE 3 (dict comprehension)")
ffm_in_ec_code = {k: v for k, v in ffm_by_id.items() 
                  if v['ec_code'].startswith(EC_CODE)}
print_first_entries(ffm_in_ec_code)

EXAMPLE 1
EC: W5K377
EC: C1BAQ3
EC: A0A1E5N270
EC: A0A1M3WN83
EC: A0A1N5VPX9
EXAMPLE 2
EC: W5K377
EC: C1BAQ3
EC: A0A1E5N270
EC: A0A1M3WN83
EC: A0A1N5VPX9
EXAMPLE 3 (dict comprehension)
EC: W5K377
EC: C1BAQ3
EC: A0A1E5N270
EC: A0A1M3WN83
EC: A0A1N5VPX9


In [33]:
def uniq_funfam_ids_with_ec_code_long_version(ffm_d, ec_str):
    uniq_by_ff_id = set()
    for member_id, ffm_dict in ffm_d.items():
        if ffm_dict['ec_code'].startswith(ec_str):
            uniq_by_ff_id.add(ffm_dict['funfam_id'])
    return uniq_by_ff_id

# equivalent to above
def uniq_funfam_ids_with_ec_code(ffm_d, ec_str):
    # dict comprehension
    # uniq_by_ff_id = {k: v for k, v in dict.items()}
    # set comprehension
    # uniq_by_ff_id = {k for k, v in dict.items()}
    return {v['funfam_id'] for k, v in ffm_d.items() 
            if v['ec_code'].startswith(ec_str)}

ff_with_ec1 = uniq_funfam_ids_with_ec_code(ffm_by_id, '3.')
print(f"Found {len(ff_with_ec1)} FunFams with EC code matching '3.'")

ff_with_ec2 = uniq_funfam_ids_with_ec_code(ffm_by_id, '3.1.')
print(f"Found {len(ff_with_ec2)} FunFams with EC code matching '3.1.'")
    
ff_with_ec3 = uniq_funfam_ids_with_ec_code(ffm_by_id, '3.1.1.')
print(f"Found {len(ff_with_ec3)} FunFams with EC code matching '3.1.1.'")


Found 48815 FunFams with EC code matching '3.'
Found 30828 FunFams with EC code matching '3.1.'
Found 25246 FunFams with EC code matching '3.1.1.'
