# SCRATCH

Installation: https://github.com/nmih/ssbio/wiki/Software-Installations#scratch

### 1. Prepare FASTA files
- This can be a file containing one or multiple amino acid sequences

In [1]:
test_fasta = '/home/nathan/projects_unsynced/ssbio/ssbio/test/test_files/scratch/test.fasta'

In [6]:
!head $test_fasta

>pdb,4i8h,A,#1
IVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN
>pdb,3w5h,A,#1
STPAITLENPDIKYPLRLIDKEVVNHDTRRFRFALPSPEHILGLPVGQHIYLSARIDGNLVIRPYTPVSSDDDKGFVDLVIKVYFKDTHPKFPAGGKMSQYLESMKIGDTIEFRGPNGLLVYQGKGKFAIRPDKKSSPVIKTVKSVGMIAGGTGITPMLQVIRAIMKDPDDHTVCHLLFANQTEKDILLRPELEELRNEHSARFKLWYTVDRAPEAWDYSQGFVNEEMIRDHLPPPEEEPLVLMCGPPPMIQYACLPNLERVGHPKERCFAF
>pdb,4i8g,A,#1
IVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN
>pdb,4eic,A,#1
ADAAAGAQVFAANCAACHAGGNNAVMPTKTLKADALKTYLAGYKDGSKSLEEAVAYQVTNGQGAMPAFGGRLSDADIANVAAYIADQAENNKW


### 2. Run SCRATCH
- The path_to_scratch should be the path to the "run_SCRATCH-1D_predictors.sh" script
- You can also run directly from the command line like so:

        /path/to/scratch/bin/run_SCRATCH-1D_predictors.sh  input_fasta output_prefix [num_threads]


In [5]:
from ssbio.sequence.properties.scratch import SCRATCH

In [3]:
test_scratch = SCRATCH(project_name='test2', seq_file=test_fasta)

In [4]:
test_scratch.run_scratch(path_to_scratch='/home/nathan/software/SCRATCH-1D_1.1/bin/run_SCRATCH-1D_predictors.sh', 
                         outdir='/home/nathan/Desktop/',
                         num_cores=4)


###################################
#                                 #
#  SCRATCH-1D release 1.1 (2015)  #
#                                 #
###################################

[SCRATCH-1D_predictions.pl] 4 protein sequence(s) found
[SCRATCH-1D_predictions.pl] generating sequence profiles...
[SCRATCH-1D_predictions.pl] running SCRATCH-1D predictors...
[SCRATCH-1D_predictions.pl] running homology analysis...
[SCRATCH-1D_predictions.pl] writing SSpro predictions...
[SCRATCH-1D_predictions.pl] writing SSpro8 predictions...
[SCRATCH-1D_predictions.pl] writing ACCpro predictions...
[SCRATCH-1D_predictions.pl] writing ACCpro20 predictions...
[SCRATCH-1D_predictions.pl] job successfully completed!



### 3. Parse results

##### Predicted secondary structure, 3 class

In [5]:
test_scratch.sspro_summary()

{'pdb,3w5h,A,#1': {'C': 0.43014705882352944,
  'E': 0.35294117647058826,
  'H': 0.21691176470588236},
 'pdb,4eic,A,#1': {'C': 0.41935483870967744,
  'E': 0.021505376344086023,
  'H': 0.5591397849462365},
 'pdb,4i8g,A,#1': {'C': 0.5560538116591929,
  'E': 0.34080717488789236,
  'H': 0.1031390134529148},
 'pdb,4i8h,A,#1': {'C': 0.5560538116591929,
  'E': 0.34080717488789236,
  'H': 0.1031390134529148}}

##### Predicted secondary structure, 8 class

In [6]:
test_scratch.sspro8_summary()

{'pdb,3w5h,A,#1': {'B': 0.007352941176470588,
  'C': 0.21323529411764705,
  'E': 0.34558823529411764,
  'G': 0.04411764705882353,
  'H': 0.17279411764705882,
  'I': 0.022058823529411766,
  'S': 0.08088235294117647,
  'T': 0.11397058823529412},
 'pdb,4eic,A,#1': {'B': 0.021505376344086023,
  'C': 0.13978494623655913,
  'E': 0.0,
  'G': 0.03225806451612903,
  'H': 0.5268817204301075,
  'I': 0.0,
  'S': 0.10752688172043011,
  'T': 0.17204301075268819},
 'pdb,4i8g,A,#1': {'B': 0.017937219730941704,
  'C': 0.2556053811659193,
  'E': 0.32286995515695066,
  'G': 0.03139013452914798,
  'H': 0.07174887892376682,
  'I': 0.0,
  'S': 0.15246636771300448,
  'T': 0.14798206278026907},
 'pdb,4i8h,A,#1': {'B': 0.017937219730941704,
  'C': 0.2556053811659193,
  'E': 0.32286995515695066,
  'G': 0.03139013452914798,
  'H': 0.07174887892376682,
  'I': 0.0,
  'S': 0.15246636771300448,
  'T': 0.14798206278026907}}

##### Predicted relative solvent accesibility (RSA), 25% cutoff

In [8]:
test_scratch.accpro_summary()

{'pdb,3w5h,A,#1': {'buried': 0.4963235294117647,
  'exposed': 0.5036764705882353},
 'pdb,4eic,A,#1': {'buried': 0.3655913978494624,
  'exposed': 0.6344086021505376},
 'pdb,4i8g,A,#1': {'buried': 0.5560538116591929,
  'exposed': 0.4439461883408072},
 'pdb,4i8h,A,#1': {'buried': 0.5560538116591929,
  'exposed': 0.4439461883408072}}

##### Predicted relative solvent accesibility (RSA), variable cutoff

In [9]:
test_scratch.accpro20_summary(cutoff=35)

{'pdb,3w5h,A,#1': {'buried': 0.6654411764705882,
  'exposed': 0.33455882352941174},
 'pdb,4eic,A,#1': {'buried': 0.5591397849462365,
  'exposed': 0.44086021505376344},
 'pdb,4i8g,A,#1': {'buried': 0.6771300448430493,
  'exposed': 0.32286995515695066},
 'pdb,4i8h,A,#1': {'buried': 0.6771300448430493,
  'exposed': 0.32286995515695066}}