In [4]:
'''
****************************
* master file for FFAT analysis of proteomes
* simple arrangement
* - select 'live' files - ie true proteomes, or test ones.
* - select the particular file(s) for test
* - select the particular analysis required.
*
* as analyses and files are added these are catered for in the modules called here
* so I see little substantial change here
* 15 Sep 2018
*
* here is the full (current structure)
* lists of available files are kept in the following two files:
* - proteomes.txt = 'live' versions
* - proteomes_test_files.txt = test versions
* Proteome Core file contains the class ProteomeReader for retrieving the files into a standard format
*   and also contains ProteomeActivityLogger to keep track of what happened when
* Proteome Scorer contains class SequenceAnalysis which does the basic work on the protein amino acid sequences
* Proteome Analyser contains ProteomeAnalyser which does the high level analysis
* this module allows with the user to specify what files are to be analysed, and retrieves the proteomes, 
*   and then allows the user to specify the analysis, It then passes the proteomes 
*   into ProteomeAnalyser for the specified analysis to take place
*
* Proteome Test Harness runs specific functional and full regression tests
*
* 29 Dec 2018
* extra functionality both here and in Proteome Analyser
* following request to add ability to add generation of proteome files and top ten for randomised proteomes
****************************
'''

'''
* - select the individual files to run - prod or test.
* - normally should be core release versions
'''

#%run 'Proteome Core Release.ipynb'
%run 'Proteome Core Dev.ipynb'
%run 'Proteome Scorer Dev.ipynb'
%run 'Proteome Analyser Dev.ipynb'

def getnumberlist(strnos):
    if '-' in strnos:
        idx = strnos.index('-')
        int1 = int(strnos[ : idx])
        int2 = int(strnos[idx + 1:])
        return range(int1, int2 + 1)
    else:
        return map(int, strnos.split())
    
def RunAnalysis():
    
    bexit = False
    bprintmode = True
    bhavetestlogger = False
    bhaveprodlogger = False
    
    while not bexit:
        
        itestmode = int(raw_input('Enter 0 for live or 1 for test (anything else exits) '))

        if itestmode == 0:
            btestmode = False
            if not bhaveprodlogger:
                livelogger = ProteomeActivityLogger(btestmode, bprintmode)
                bhaveprodlogger = True
        elif itestmode == 1:
            btestmode = True
            if not bhavetestlogger:
                testlogger = ProteomeActivityLogger(btestmode, bprintmode)
                bhavetestlogger = True
        else:
            bexit = True

        if not bexit:
            
            if btestmode:
                mylogger = testlogger
            else:
                mylogger = livelogger

            myreader = ProteomeReader(btestmode)
            myscorer = SequenceAnalysis()
            myanalysis = ProteomeAnalyser(mylogger, myscorer, btestmode)
            
            print 'reader version = ' + myreader.versionid
            print 'scorer version = ' + myscorer.versionid
            print 'analyser version = ' + myanalysis.versionid

            mylogger.write_log_entry('core reader version = ' + myreader.versionid)

            print 'available files'
            print '---------------'
            availablefiles = myreader.showlibrary()
            for i in range(0, len(availablefiles)):
                print i, availablefiles[i][0]

            print ' '
            fn = raw_input('enter files to test (no commas) ')
            myfilenos = getnumberlist(fn)
            #myfilenos =  map(int, fn.split())

            availableanalyses = myanalysis.available_analysis()
            print ' '
            print 'available analyses'
            print '---------------'
            for i in range(0, len(availableanalyses)):
                print i, availableanalyses[i][1], availableanalyses[i][2]

            idx = int(raw_input('Enter analysis to test '))
            print ' '
            
            bfullproteome = False # need as now an argument of run_analysis for some options
            if myanalysis.has_generatedproteome_option(idx):
                bfullproteome = (1 == int(raw_input('Enter 1 for full proteome file generation, anything else for just a histogram ')))

            myproteomes = myreader.read_files(myfilenos)

            myanalysis.run_analysis(idx, myproteomes, bfullproteome)
            

            print ' '
            print 'finished analysis'
            print ' '

       
    print 'terminated'
        
'''
****************************
* run RunAnalysis for a directed route to analysis
****************************
'''

RunAnalysis()

Enter 0 for live or 1 for test (anything else exits) 0
New activity log version 1.2 Candidate release **
0: 20190214 19:27. scorer version = Dev on 1.0, logger version = Dev on 1.0
1: 20190214 19:27. random seed set to 1550172476
reader version = 1.1 Candidate release
scorer version = Dev on 1.0
analyser version = Dev on 1.0
2: 20190214 19:27. core reader version = 1.1 Candidate release
available files
---------------
0 orf_trans
1 uniprot_proteome_yeastx6049_fasta
2 HumanProteomeFasta
3 Plasmodium_3D7_proteome_fasta
4 Albugo_proteome_fasta
5 uniprot_Arabidopsis_proteome_fasta
6 uniprot_C_elegans_proteome_fasta
7 uniprot_Drosophila_proteome_fasta
8 UP000025675_M_tuberculosis_proteome_x3978_fasta
9 UP000008816_S_aureus_x2889_fasta
10 UP000051952_Bodo_saltans_x18124_fasta
11 UP000009168_Tetrahymena_proteome_x26976_fasta
12 Ubr1x1331 ipr003126(NOTpf02617pf13764pf12937+ident05fasta
13 Rqc1=Tcf25x545identity05fasta
14 Mcm3x196identity05fasta
15 Seg2_blits3_x23
16 Spironucleus_proteome_AUP00