Skip to content

Commit

Permalink
🤿towards v0.3.3 (#16)
Browse files Browse the repository at this point in the history
* 💦opt for pisa

* loose time

* 🚋add export-interaction-mapping command

* pdb_start -> pdb_beg
  • Loading branch information
NatureGeorge authored May 20, 2021
1 parent 6df4d11 commit b3331fa
Show file tree
Hide file tree
Showing 11 changed files with 487 additions and 207 deletions.
2 changes: 1 addition & 1 deletion pdb_profiling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# @Author: ZeFeng Zhu
# @Last Modified: 2020-05-13 08:54:09 pm
# @Copyright (c) 2020 MinghuiGroup, Soochow University
__version__ = '0.3.2'
__version__ = '0.3.3'


def default_config(folder='./'):
Expand Down
29 changes: 26 additions & 3 deletions pdb_profiling/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class ResidueAnnotation(orm.Model):
chain_id = orm.String(max_length=10)
resource = orm.String(max_length=100, primary_key=True)
resource_id = orm.String(max_length=200, primary_key=True)
pdb_start = orm.Integer()
pdb_beg = orm.Integer()
pdb_end = orm.Integer()

class UniProtAnnotation(orm.Model):
Expand All @@ -118,7 +118,7 @@ class UniProtAnnotation(orm.Model):
UniProt = orm.String(max_length=50, primary_key=True)
resource = orm.String(max_length=100, primary_key=True)
resource_id = orm.String(max_length=200, primary_key=True)
unp_start = orm.Integer()
unp_beg = orm.Integer()
unp_end = orm.Integer()

class SMRModel(orm.Model):
Expand All @@ -145,7 +145,29 @@ class MappedMutation(orm.Model):
Ref = orm.String(max_length=3, primary_key=True)
Pos = orm.Integer(primary_key=True)
Alt = orm.String(max_length=3, primary_key=True)



class PI(orm.Model):
__tablename__ = 'PI'
__metadata__ = self.metadata
__database__ = self.database

UniProt = orm.String(max_length=50, primary_key=True)
pdb_id = orm.String(max_length=4, primary_key=True)
entity_id = orm.Integer(primary_key=True)
struct_asym_id = orm.String(max_length=10, primary_key=True)
chain_id = orm.String(max_length=10)
assembly_id = orm.Integer(primary_key=True)
model_id = orm.Integer()
struct_asym_id_in_assembly = orm.String(max_length=10, primary_key=True)
interface_id = orm.Integer(primary_key=True)
css = orm.Float()
i_select_tag = orm.Boolean()
i_select_rank = orm.Integer()
pdb_beg = orm.Integer()
pdb_end = orm.Integer()


self.AAThree2one = AAThree2one
self.UniProtSeq = UniProtSeq
self.Mutation = Mutation
Expand All @@ -156,3 +178,4 @@ class MappedMutation(orm.Model):
self.ResidueAnnotation = ResidueAnnotation
self.SMRModel = SMRModel
self.MappedMutation = MappedMutation
self.PI = PI
112 changes: 110 additions & 2 deletions pdb_profiling/commands/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def id_mapping(ctx, input, column, sep, chunksize, auto_assign, sleep):
if sleep:
tsleep(uniform(1, 10))


@Interface.command('check-muta-conflict')
@click.option('--chunksize', type=int, default=100000)
@click.pass_context
Expand Down Expand Up @@ -198,7 +199,7 @@ def get_unp_id(args):
Entry, isoform, is_canonical = args
return Entry if is_canonical else isoform

kwargs = dict(sub.split('=') for item in kwargs for sub in item.split(','))
kwargs = dict(sub.split('=') for item in kwargs for sub in item.split(';'))
if len(kwargs) > 0:
for key,value in kwargs.items():
kwargs[key] = eval(value)
Expand Down Expand Up @@ -305,7 +306,7 @@ def residue_mapping(ctx, input, chunksize, output, sleep):
sqlite_api.sync_insert(sqlite_api.ResidueMappingRange, res_mapping_df.to_dict('records'))
console.log(f'Done: {done}')
if sleep:
tsleep(uniform(0, 3))
tsleep(uniform(0, 2))


@Interface.command('insert-sele-mapping')
Expand Down Expand Up @@ -383,6 +384,52 @@ def expand_iso_range(res):
console.log(f'Done: {len(res)+chunksize*i}')


def pi2records(dfrm: DataFrame, usecols: list, pair_cols: list):
yield from yield_interact_records(dfrm[usecols[:13]].rename(columns=dict(zip(usecols[6:13], pair_cols))))
yield from yield_interact_records(dfrm[usecols[:6]+usecols[13:]].rename(columns=dict(zip(usecols[13:], pair_cols))))

def yield_interact_records(dfrm: DataFrame):
if 'UniProt' in dfrm.columns:
for row in dfrm.itertuples(index=False):
for beg, end in eval(row.interface_range):
yield dict(UniProt=row.UniProt, pdb_id=row.pdb_id, entity_id=row.entity_id,
struct_asym_id=row.struct_asym_id, chain_id=row.chain_id,
assembly_id=row.assembly_id, model_id=row.model_id,
struct_asym_id_in_assembly=row.struct_asym_id_in_assembly,
interface_id=row.interface_id, css=row.css,
i_select_tag=row.i_select_tag, i_select_rank=row.i_select_rank,
pdb_beg=beg, pdb_end=end)
else:
for row in dfrm.itertuples(index=False):
for beg, end in eval(row.interface_range):
yield dict(UniProt='NaN', pdb_id=row.pdb_id, entity_id=row.entity_id,
struct_asym_id=row.struct_asym_id, chain_id=row.chain_id,
assembly_id=row.assembly_id, model_id=row.model_id,
struct_asym_id_in_assembly=row.struct_asym_id_in_assembly,
interface_id=row.interface_id, css=row.css,
i_select_tag=row.i_select_tag, i_select_rank=row.i_select_rank,
pdb_beg=beg, pdb_end=end)

@Interface.command('insert-interaction')
@click.option('-i', '--input', type=click.Path())
@click.option('--chunksize', type=int, help="the chunksize parameter", default=5000)
@click.option('--ppi/--no-ppi', is_flag=True, default=True)
@click.pass_context
def insert_interaction(ctx, input, chunksize, ppi):
custom_db = ctx.obj['custom_db']
common_cols = ['pdb_id', 'assembly_id', 'interface_id', 'css', 'i_select_tag', 'i_select_rank']
pair_cols = ['entity_id', 'struct_asym_id', 'chain_id', 'model_id', 'struct_asym_id_in_assembly', 'interface_range', 'UniProt']
usecols = common_cols + [col+'_1' for col in pair_cols] + [col+'_2' for col in pair_cols]
df_usecols = usecols if ppi else usecols[:-1]
dfs = read_csv(input, sep='\t', keep_default_na=False, na_values=[''], chunksize=chunksize, usecols=df_usecols)
done: int = 0
with console.status("[bold green]inserting..."):
for df in dfs:
custom_db.sync_insert(custom_db.PI, list(pi2records(df[df.i_select_rank.ne(-1)], usecols, pair_cols)))
done += df.shape[0]
console.log(f'Done: {done}')


@Interface.command('export-mutation-mapping')
@click.option('--with_id/--no-with_id', is_flag=True, default=False)
@click.option('--sele/--no-sele', is_flag=True, default=True)
Expand Down Expand Up @@ -446,6 +493,67 @@ def export_residue_remapping(ctx, with_id, sele, output):
console.log(f'result saved in {output_path}')


@Interface.command('export-interaction-mapping')
@click.option('--with_id/--no-with_id', is_flag=True, default=False)
@click.option('-o', '--output', type=str, help='filename of output file')
@click.pass_context
def export_interface_mapping(ctx, with_id, output):
output_path = ctx.obj['folder']/output
query = """
SELECT DISTINCT
%s
CASE IDMapping.is_canonical
WHEN 1
THEN IDMapping.Entry
ELSE IDMapping.isoform
END edUniProt, Mutation.Ref, Mutation.Pos, Mutation.Alt,
Mutation.Pos - ResidueMappingRange.unp_beg + ResidueMappingRange.pdb_beg AS residue_number,
Mutation.Pos - ResidueMappingRange.unp_beg + ResidueMappingRange.auth_pdb_beg AS author_residue_number,
ResidueMappingRange.author_insertion_code,
ResidueMappingRange.observed_ratio,
ResidueMappingRange.pdb_id,
ResidueMappingRange.entity_id,
ResidueMappingRange.struct_asym_id,
ResidueMappingRange.chain_id,
PI.assembly_id,
PI.model_id,
PI.struct_asym_id_in_assembly,
PI.interface_id,
PI.css,
PI.i_select_tag,
PI.i_select_rank,
(Mutation.Pos - ResidueMappingRange.unp_beg + ResidueMappingRange.pdb_beg >= PI.pdb_beg AND Mutation.Pos - ResidueMappingRange.unp_beg + ResidueMappingRange.pdb_beg <= PI.pdb_end) AS is_interface_residue
FROM Mutation,ResidueMappingRange
INNER JOIN IDMapping ON Mutation.ftId = IDMapping.ftId
INNER JOIN UniProtSeq ON UniProtSeq.isoform = IDMapping.isoform
AND UniProtSeq.Pos = Mutation.Pos
AND UniProtSeq.Ref = Mutation.Ref
INNER JOIN SelectedMappingMeta ON SelectedMappingMeta.UniProt = ResidueMappingRange.UniProt
AND SelectedMappingMeta.pdb_id = ResidueMappingRange.pdb_id
AND SelectedMappingMeta.struct_asym_id = ResidueMappingRange.struct_asym_id
INNER JOIN PI ON PI.UniProt = ResidueMappingRange.UniProt
AND PI.pdb_id = ResidueMappingRange.pdb_id
AND PI.struct_asym_id = ResidueMappingRange.struct_asym_id
WHERE ResidueMappingRange.UniProt = edUniProt
AND Mutation.Pos >= ResidueMappingRange.unp_beg
AND Mutation.Pos <= ResidueMappingRange.unp_end
AND ResidueMappingRange.conflict_code IS NULL
AND ResidueMappingRange.observed_ratio > 0
AND (ResidueMappingRange.residue_name = '' OR ResidueMappingRange.residue_name IN (SELECT three_letter_code FROM AAThree2one))
AND SelectedMappingMeta.select_rank != -1
;
"""
query = query % ('Mutation.ftId,' if with_id else '')
with console.status("[bold green]query..."):
dfs = read_sql_query(query, ctx.obj['custom_db'].engine, chunksize=10000)
for df in dfs:
if df.shape[0] == 0:
continue
df.rename(columns={'edUniProt': 'UniProt'}).to_csv(
output_path, index=False, mode='a+', sep='\t', header=not output_path.exists())
console.log(f'result saved in {output_path}')


@Interface.command('insert-sele-mutation-mapping')
@click.option('-i', '--input', type=click.Path())
@click.option('--chunksize', type=int, help="the chunksize parameter", default=10000)
Expand Down
4 changes: 3 additions & 1 deletion pdb_profiling/processors/pdbe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class profile_id(orm.Model):


class PISAInterfaceDict(orm.Model):
__tablename__ = 'PISAInterface'
__tablename__ = 'PISAInterfaceDict'
__metadata__ = self.metadata
__database__ = self.database
entity_id_1 = orm.Integer()
Expand All @@ -127,6 +127,8 @@ class PISAInterfaceDict(orm.Model):
interface_id = orm.Integer(primary_key=True)
use_au = orm.Boolean()
css = orm.Float()
is_polymer_1 = orm.Boolean()
is_polymer_2 = orm.Boolean()

self.ResidueMapping = ResidueMapping
self.StatsProteinEntitySeq = StatsProteinEntitySeq
Expand Down
5 changes: 1 addition & 4 deletions pdb_profiling/processors/pdbe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,7 @@ def yieldPISAInterfaceList(data: Dict):
for record in records:
flatten_dict(record, 'structure_1')
flatten_dict(record, 'structure_2')
flatten_dict(data[pdb], 'page_title', False)
cols = sorted(i for i in data[pdb].keys()
if i != 'interfaceentries')
yield records, cols, tuple(data[pdb][col] for col in cols)
yield records, ('pdb_id', 'assembly_id'), (pdb, data[pdb]['page_title']['assemble_code'])

@staticmethod
@dispatch_on_set('api/pisa/interfacedetail/')
Expand Down
Loading

0 comments on commit b3331fa

Please sign in to comment.