From 802ecaaa2ec7263946f9f726e65786e606efb6a4 Mon Sep 17 00:00:00 2001 From: Jared Adolf-Bryfogle Date: Fri, 14 Sep 2018 16:14:01 -0500 Subject: [PATCH] add a few more xml scripts. Most are just examples. --- apps/public/pdb_utils/fix_space_chain.py | 34 ++++++++ apps/public/pyrosetta/get_phi_psi.py | 43 ++++++++++ apps/public/rosetta/score_analysis.py | 17 ++-- database/rosetta/xml_scripts/GTR.xml | 14 ++++ .../analyze_interface_and_rmsd.xml | 24 ++++++ .../xml_scripts/design_focus_basic.xml | 54 +++++++++++++ .../xml_scripts/design_focus_cart_rel.xml | 71 ++++++++++++++++ .../xml_scripts/design_focus_rel_ab_scan.xml | 73 +++++++++++++++++ .../rosetta/xml_scripts/glycosylate_model.xml | 19 +++++ .../model_glycans_simple_substituted.xml | 11 +++ .../rosetta/xml_scripts/rs_relax_cdr_csts.xml | 20 +++++ .../run_rabd_with_H1_H2_metrics.xml | 25 ++++++ .../xml_scripts/run_rabd_with_metrics.xml | 27 +++++++ jade/basic/plotting/MakeFigure.py | 1 - jade/rosetta_jade/ScoreFiles.py | 81 ++++++++++++------- setup.py | 10 +-- 16 files changed, 484 insertions(+), 40 deletions(-) create mode 100644 apps/public/pdb_utils/fix_space_chain.py create mode 100644 apps/public/pyrosetta/get_phi_psi.py create mode 100644 database/rosetta/xml_scripts/GTR.xml create mode 100644 database/rosetta/xml_scripts/analyze_interface_and_rmsd.xml create mode 100644 database/rosetta/xml_scripts/design_focus_basic.xml create mode 100644 database/rosetta/xml_scripts/design_focus_cart_rel.xml create mode 100644 database/rosetta/xml_scripts/design_focus_rel_ab_scan.xml create mode 100644 database/rosetta/xml_scripts/glycosylate_model.xml create mode 100644 database/rosetta/xml_scripts/model_glycans_simple_substituted.xml create mode 100644 database/rosetta/xml_scripts/rs_relax_cdr_csts.xml create mode 100644 database/rosetta/xml_scripts/run_rabd_with_H1_H2_metrics.xml create mode 100644 database/rosetta/xml_scripts/run_rabd_with_metrics.xml diff --git a/apps/public/pdb_utils/fix_space_chain.py b/apps/public/pdb_utils/fix_space_chain.py new file mode 100644 index 0000000..b22026c --- /dev/null +++ b/apps/public/pdb_utils/fix_space_chain.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +import os,sys + +def insert (source_str, insert_str, pos): + return source_str[:pos]+insert_str+source_str[pos:] + +if __name__ == "__main__": + + if len(sys.argv) == 1: + sys.exit("Fixes symmetry perl script issue with chains that are spaces. First argument is the pdb file") + input_pdb_file = sys.argv[1] + + lines = open(input_pdb_file, 'r').readlines() + new_lines = [] + for line in lines: + #print line + new_line = line + if len(line) > 4: + print line[0:6] + if line[0:6] == "HETATM": + chain_column = 22-1 + chain = line[chain_column] + print chain + if chain == ' ' or len(line.strip()) == 77: + new_line = insert(new_line, " ", chain_column) + + + new_lines.append(new_line) + + + OUTFILE = open("test_pdb.pdb", 'w') + for line in new_lines: + OUTFILE.write(line) + OUTFILE.close() diff --git a/apps/public/pyrosetta/get_phi_psi.py b/apps/public/pyrosetta/get_phi_psi.py new file mode 100644 index 0000000..546f329 --- /dev/null +++ b/apps/public/pyrosetta/get_phi_psi.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +#Simple Script to get Phi/Psi using PyRosetta. +from __future__ import print_function +from argparse import ArgumentParser + +import rosetta +from pyrosetta import * +pyrosetta.init() + + + + + + + +if __name__ == "__main__": + parser = ArgumentParser("Get Phi/Psi of all residues in protein or a range of residues") + + + parser.add_argument('-s', help="Input Structure", required=True) + parser.add_argument('--start', help = "Starting resnum (pose/PDB - EX:24L)") + parser.add_argument('--span', help = "Number of residues to print from start") + + + options = parser.parse_args() + + + p = pose_from_pdb(options.s) + + start = 1 + if options.start: + start = rosetta.core.pose.parse_resnum(options.start, p) + + end = p.total_residue() + if options.span: + end = start+int(options.span) + + out = "res resPDB phi psi" + print(out) + for i in range(start, end+1): + out = str(i)+" "+p.pdb_info().pose2pdb(i)+" "+str(180+p.phi(i))+" "+str(180+p.psi(i)) + print(out) diff --git a/apps/public/rosetta/score_analysis.py b/apps/public/rosetta/score_analysis.py index 8b1a2cd..bf970ac 100755 --- a/apps/public/rosetta/score_analysis.py +++ b/apps/public/rosetta/score_analysis.py @@ -15,7 +15,7 @@ from jade.rosetta_jade.ScoreFiles import ScoreFile from jade.basic.plotting.MakeFigure import * -import shutil +import os ######################################################################## @@ -27,10 +27,15 @@ def get_parser(): parser = ArgumentParser( description="This utility parses and extracts data from score files in JSON format") - parser.add_argument("scorefiles", nargs='*', help="A list of scorefiles") + #parser.add_argument("scorefiles", nargs='*', help="A list of scorefiles") - parser.add_argument("-s", "--scoretypes", - default=["dSASA_int", "delta_unsatHbonds", "hbonds_int", "total_score", "dG_separated", "top_n_by_10"], + parser.add_argument("-s", "--scorefiles", + help = "Scorefiles to use", + nargs="*", + required = True) + + parser.add_argument("--scoretypes", + default=["total_score"], help="List of score terms to extract", nargs='*') @@ -376,14 +381,14 @@ def main(): for i in range(0, options.top_n): print get_decoy_path(top_by_n_decoys[i][1]) - shutil.copy(get_decoy_path(top_by_n_decoys[i][1]), options.outdir) + os.system("cp " +get_decoy_path(top_by_n_decoys[i][1]) +" "+options.outdir) else: ordered = sf.get_ordered_decoy_list(scoreterm, top_n=int(options.top_n), decoy_names=decoy_names) top_decoys = [[o[0], pdb_dir + "/" + o[1]] for o in ordered] for i in range(0, options.top_n): print get_decoy_path(top_decoys[i][1]) - shutil.copy(get_decoy_path(top_decoys[i][1]), options.outdir) + os.system("cp "+ get_decoy_path(top_decoys[i][1])+" "+ options.outdir) ######################################################################## diff --git a/database/rosetta/xml_scripts/GTR.xml b/database/rosetta/xml_scripts/GTR.xml new file mode 100644 index 0000000..24451c2 --- /dev/null +++ b/database/rosetta/xml_scripts/GTR.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/database/rosetta/xml_scripts/analyze_interface_and_rmsd.xml b/database/rosetta/xml_scripts/analyze_interface_and_rmsd.xml new file mode 100644 index 0000000..f94fe78 --- /dev/null +++ b/database/rosetta/xml_scripts/analyze_interface_and_rmsd.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/database/rosetta/xml_scripts/design_focus_basic.xml b/database/rosetta/xml_scripts/design_focus_basic.xml new file mode 100644 index 0000000..80f3294 --- /dev/null +++ b/database/rosetta/xml_scripts/design_focus_basic.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run Packer on input as a CONTROL for score comparisons + + + + + Run Packer to do design + + + + + + \ No newline at end of file diff --git a/database/rosetta/xml_scripts/design_focus_cart_rel.xml b/database/rosetta/xml_scripts/design_focus_cart_rel.xml new file mode 100644 index 0000000..e008923 --- /dev/null +++ b/database/rosetta/xml_scripts/design_focus_cart_rel.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Run Cartesian on input PDB as a SCORE CONTROL + + + + + Make sure Coordinate Constraints are still there after ramp. + + + + Get original pose and run design. + + + + + + + \ No newline at end of file diff --git a/database/rosetta/xml_scripts/design_focus_rel_ab_scan.xml b/database/rosetta/xml_scripts/design_focus_rel_ab_scan.xml new file mode 100644 index 0000000..82d4356 --- /dev/null +++ b/database/rosetta/xml_scripts/design_focus_rel_ab_scan.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + We are not going to do a general way here. I need to refactor this to be able to set a specific set of CDRs + For now, we manually do it. + + + + + + + + + + + + Get original pose and run design. + + + + + + \ No newline at end of file diff --git a/database/rosetta/xml_scripts/glycosylate_model.xml b/database/rosetta/xml_scripts/glycosylate_model.xml new file mode 100644 index 0000000..47cde29 --- /dev/null +++ b/database/rosetta/xml_scripts/glycosylate_model.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/database/rosetta/xml_scripts/model_glycans_simple_substituted.xml b/database/rosetta/xml_scripts/model_glycans_simple_substituted.xml new file mode 100644 index 0000000..eb25e73 --- /dev/null +++ b/database/rosetta/xml_scripts/model_glycans_simple_substituted.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/database/rosetta/xml_scripts/rs_relax_cdr_csts.xml b/database/rosetta/xml_scripts/rs_relax_cdr_csts.xml new file mode 100644 index 0000000..1682922 --- /dev/null +++ b/database/rosetta/xml_scripts/rs_relax_cdr_csts.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/database/rosetta/xml_scripts/run_rabd_with_H1_H2_metrics.xml b/database/rosetta/xml_scripts/run_rabd_with_H1_H2_metrics.xml new file mode 100644 index 0000000..69cbcd9 --- /dev/null +++ b/database/rosetta/xml_scripts/run_rabd_with_H1_H2_metrics.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/database/rosetta/xml_scripts/run_rabd_with_metrics.xml b/database/rosetta/xml_scripts/run_rabd_with_metrics.xml new file mode 100644 index 0000000..eedaa7a --- /dev/null +++ b/database/rosetta/xml_scripts/run_rabd_with_metrics.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/jade/basic/plotting/MakeFigure.py b/jade/basic/plotting/MakeFigure.py index 836fea6..8af6d1f 100644 --- a/jade/basic/plotting/MakeFigure.py +++ b/jade/basic/plotting/MakeFigure.py @@ -119,7 +119,6 @@ def plot_general_pandas(df, title, outpath, plot_type, x, y = None, z = None, to pad_single_title(ax) - ax.set_axis_bgcolor('white') fig = ax.get_figure() fig.savefig(outpath, dpi=300) return ax diff --git a/jade/rosetta_jade/ScoreFiles.py b/jade/rosetta_jade/ScoreFiles.py index 59102bd..4e48f89 100644 --- a/jade/rosetta_jade/ScoreFiles.py +++ b/jade/rosetta_jade/ScoreFiles.py @@ -16,8 +16,21 @@ ##Forked by Jared Adolf-Bryfogle. ##Has been completely refactored to work with pandas Dataframes +def get_dataframe(filename, match=""): + """ + Convert a Rosetta Score file directly to a dataframe. + + :param filename: path to file + :return: pandas.DataFrame + """ + sc = ScoreFile(filename, match) + df = sc.get_Dataframe() + df = df.set_index('decoy') + df = df.reindex(sorted(df.columns), axis=1) + return df + class ScoreFile: - def __init__(self, filename): + def __init__(self, filename, match=""): self.filename = filename if re.search("score_", filename): @@ -36,34 +49,42 @@ def __init__(self, filename): lines = file(filename).readlines() header = lines[0] - headerSP = lines[1].split() + headerSP = "" + if len(lines) > 1: + headerSP = lines[1].split() + #print repr(headerSP) for line in lines: - try: - o = json.loads(line.replace("nan", "NaN")) - # print o[self.decoy_field_name] - # print repr(o) - self.decoys.append(o) - except Exception as e: - ##Store as defaultdict instead of JSON. - - #print "Cannot load as regular JSON file! Parsing as old-school scorefile instead: "+ str(e) - d = defaultdict() - values = line.split() - if len(values) != len(headerSP): - if len(values) == 1 and values[0] =="SEQUENCE:": continue - print >> sys.stderr, "Failed to parse JSON object or as regular score file; skipping line:\n", line - else: - for i in range(0, len(values)): - k = headerSP[ i ] - if k == "description": - k = "decoy" - - if values[i] == "SCORE:": continue - - d[ k ] = deduce_str_type(values[i]) - - self.decoys.append(d) + if (match): + if re.search(match, line): + pass + else: + continue + try: + o = json.loads(line.replace("nan", "NaN")) + # print o[self.decoy_field_name] + # print repr(o) + self.decoys.append(o) + except Exception as e: + ##Store as defaultdict instead of JSON. + + #print "Cannot load as regular JSON file! Parsing as old-school scorefile instead: "+ str(e) + d = defaultdict() + values = line.split() + if len(values) != len(headerSP): + if len(values) == 1 and values[0] =="SEQUENCE:": continue + print >> sys.stderr, "Failed to parse JSON object or as regular score file; skipping line:\n", line + else: + for i in range(0, len(values)): + k = headerSP[ i ] + if k == "description": + k = "decoy" + + if values[i] == "SCORE:": continue + + d[ k ] = deduce_str_type(values[i]) + + self.decoys.append(d) #print repr(self.decoys) @@ -198,7 +219,11 @@ def get_Dataframe(self, scoreterms=None, order_by="total_score", top_n=-1, rever df = detect_numeric(df) #df.to_csv("debugging.csv", sep=",") - df = df.sort_values(order_by, ascending=reverse)[0:top_n] + df = df.sort_values(order_by, ascending=reverse) + + if (top_n!=-1): + df = df.head(top_n) + if scoreterms: df = get_columns(df, scoreterms) df.name = self.name diff --git a/setup.py b/setup.py index 2a3135f..303ea71 100644 --- a/setup.py +++ b/setup.py @@ -75,23 +75,23 @@ def get_all_scripts_to_install(public_dir='apps/public', pilot_dir='apps/pilot') all_scripts = [] for outer in [public_dir, pilot_dir]: for app_dir in (sorted([ d for d in glob.glob(os.path.join(outer, "*")) if os.path.isdir(d)])): - #print "reading " + app_dir + print "reading " + app_dir f = glob.glob(app_dir+"/"+"*.py") - #print(f) + print(f) for script in f: if not re.search("__init__", script): all_scripts.append(script) - #print all_scripts + print all_scripts print "Found scripts:" print "\n\t".join(all_scripts) print "\n\n" return all_scripts def find_all_packages(): - #print "Finding Packages." + print "Finding Packages." p = ['jade/'+ sub for sub in find_packages('jade')] - #print repr(p) + print repr(p) return p setup(name='bio-jade',