In [1]:
import os
import pandas as pd
import tmkit as tmk
from typing import List, Tuple
from tmkit.topo import from_pdbtm, from_tmhmm, from_phobius
from tests import dir_data, tmp_data, exp_data

  pd_version = LooseVersion(pd.__version__)


# Sequence

## Retrieve

### RCSB PDB file

In [None]:
dpath = os.path.join(tmp_data, 'pdb/')
os.makedirs(dpath, exist_ok=True)

prot_series = pd.Series(["6e3y", "1xqf"])

tmk.seq.retrieve_pdb_from_rcsb(
    prot_series=prot_series,
    sv_fp= dpath,
)

### PDBTM PDB file

In [12]:
dpath = os.path.join(tmp_data, 'pdb/pdbtm/')
os.makedirs(dpath, exist_ok=True)

tmk.seq.retrieve_pdb_from_pdbtm(
    prot_series=prot_series,
    sv_fp = dpath,
)

===>No.1 protein name: 6e3y


===>No.2 protein name: 6rfq
===>No.3 protein name: 6t0b


### PDBTM XML file

In [None]:
dpath = os.path.join(tmp_data, 'pdb/xml/')
os.makedirs(dpath, exist_ok=True)

tmk.seq.retrieve_xml_from_pdbtm(
    prot_series=prot_series,
    sv_fp=dpath,
)

===>No.0 protein name: 6e3y
===>No.1 protein name: 6rfq
===>No.2 protein name: 6t0b
===>No.3 protein name: 1xqf


0

### AlphaFold PDB file

In [None]:
dpath = os.path.join(tmp_data, 'pdb/alphafold/')
os.makedirs(dpath, exist_ok=True)

prot_series = pd.Series(['P63092', 'Q9B6E8', 'P07256', 'P63027'])

tmk.seq.retrieve_pdb_alphafold(
    prot_series=prot_series,
    sv_fp=dpath,
)

## Read

In [None]:
# Sequence from a Fasta file

fin = os.path.join(dir_data, "1xqfA.fasta")

sequence = tmk.seq.read_from_fasta(fasta_fpn=fin)
sequence

# Get residue IDs from a FASTA file
seq_fasta_ids = tmk.seq.fasid(fasta_fpn=fin)


In [None]:
# Sequence from a PDB file

fin = os.path.join(tmp_data, "pdb/1xqf.pdb")

sequence = tmk.seq.read_from_pdb(
    pdb_fp=dir_data,
    prot_name='1xqf',
    seq_chain='A',
    file_chain='A',
)

sequence

'AVADKADNAFMMICTALVLFMTIPGIALFYGGLIRGKNVLSMLTQVTVTFALVCILWVVYGYSLAFGEGNNFFGNINWLMLKNIELTAVMGSIYQYIHVAFQGSFACITVGLIVGALAERIRFSAVLIFVVVWLTLSYIPIAHMVWGGGLLASHGALDFAGGTVVHINAAIAGLVGAYLPHNLPMVFTGTAILYIGWFGFNAGSAGTANEIAALAFVNTVVATAAAILGWIFGEWALRGKPSLLGACSGAIAGLVGVTPACGYIGVGGALIIGVVAGLAGLWGVTMPCDVFGVHGVCGIVGCIMTGIFAASSLGGVGFAEGVTMGHQLLVQLESIAITIVWSGVVAFIGYKLADLTVGLRVP'

In [None]:
# TODO: bug in the function
# Sequence from an XML file
sequence = tmk.seq.read_from_xml(
    xml_fp=dir_data,
    xml_name='1xqf',
    seq_chain='A',
)

# Feature

## Helix surface identification

In [3]:
import tmkit as tmk

fdir = os.path.join(dir_data, "lips-")

df = tmk.feature.read_helix_surf(
    fp=fdir,
    prot_name='1xqf',
    file_chain='A',
    id=1,
)

In [None]:
aa_surf_rank, _, _, _ = tmk.feature.read(
    fp=fdir,
    prot_name='1xqf',
    file_chain='A',
)

In [10]:
df = tmk.feature.read_helix_all_surf(
    fp=fdir,
    prot_name='1xqf',
    file_chain='A',
)

df


Unnamed: 0,surfs,lipos,ents,lxe
0,5,1.834,4.846,8.889
1,0,1.77,4.912,8.694
2,3,1.729,4.852,8.389
3,1,1.815,4.885,8.865
4,2,1.791,4.749,8.507
5,6,1.777,4.746,8.435
6,4,1.767,4.948,8.741


# CATH

In [11]:
import tmkit as tmk

res = tmk.cath.summary_by_id(
    id='1cukA01'
)


res["domain"] == "http://www.cathdb.info/version/v4_2_0/api/rest/domain_summary/1cukA01"

{'domain': 'http://www.cathdb.info/version/v4_2_0/api/rest/domain_summary/1cukA01', 'funfam': 'http://www.cathdb.info/version/v4_2_0/api/rest/superfamily/1.10.8.10/funfam/1cukA01', 'superfamily': 'http://www.cathdb.info/version/v4_2_0/api/rest/superfamily/1cukA01'}


# MSA

In [19]:
import tmkit as tmk
import pandas as pd

prot_series = pd.Series(['P63092', 'Q9B6E8'])
tmk.seq.retrieve_pdb_alphafold(
    prot_series=prot_series,
    sv_fp='/tmp',
)


# Collation

In [2]:
# PDBTM

pdb_rcsb_fp = os.path.join(dir_data, 'pdb/collate/rcsb/')
pdb_pdbtm_fp = os.path.join(dir_data, 'pdb/collate/pdbtm/')

chains = tmk.collate.chain(
    prot_name='6cxh',
    pdb_fp=pdb_pdbtm_fp,
)
print(chains)

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']


# Mapping

In [19]:

res = tmk.mapping.pdb2uniprot(
    id='101m.A',
    ref_fpn= os.path.join(dir_data, 'map/pdb_chain_uniprot.csv'),
)
print(res)

P02185


# Mutation

In [None]:
import tmkit as tmk

tmk.mut.download_predmuthtp_db(
    sv_fp= os.path.join(exp_data, 'mutation')
)

# PPI

In [4]:
import tmkit as tmk

tmk.ppi.download_intact_db(
    version='current',
    sv_fp= os.path.join(exp_data, 'ppi')
)

===>The IntAct database of version current is being downloaded...
===>The database of version current is successfully downloaded!
===>The database of version current is being decompressed...
===>The database of version current is successfully decompressed!


'Finished!'

# Residue contact

In [None]:
import tmkit as tmk

df1 = tmk.rrc.read(
    prot_name='1xqf',
    seq_chain='A',
    fasta_fp=os.path.join(dir_data, 'fasta/'),
    pdb_fp=os.path.join(dir_data, 'pdb/'),
    xml_fp=os.path.join(dir_data, 'xml/'),
    dist_fp=os.path.join(dir_data, 'rrc/'),
    tool_fp=os.path.join(dir_data, 'rrc/tool/'),
    seq_sep_inferior=1,
    seq_sep_superior=None,
    tool='membrain2',
)