In [14]:
from datetime import datetime
import json
import sys

def createAlignmentPage(p1,p2):
    if '_' in p2:
        return {
            "h2:caption":f"{p1} & {p2} Alignment",
            "md:info":f"Alignment of {p1} and it's homolog [{p2}](https://www.uniprot.org/uniprotkb/{p2}) found by PROST",
            "alnpdb:test":{
                "pdb1":f"https://raw.githubusercontent.com/MesihK/maize-study/master/esmfold/all/{p1}.pdb",
                "pdb2":f"https://raw.githubusercontent.com/MesihK/maize-study/master/pdb/{p2}.pdb",
                "alnpdb":"",
                "lineLen":120
            }
        }
    return {
            "h2:caption":f"{p1} & {p2} Alignment",
            "md:info":f"Alignment of {p1} and it's homolog [{p2}](https://www.uniprot.org/uniprotkb/{p2}) found by PROST",
            "alnpdb:test":{
                "pdb1":f"https://raw.githubusercontent.com/MesihK/maize-study/master/esmfold/all/{p1}.pdb",
                "pdb2":f"https://alphafold.ebi.ac.uk/files/AF-{p2}-F1-model_v4.pdb",
                "alnpdb":"",
                "lineLen":120
            }
        }

def toJSONWP(queryDB,targetDB,thr,gothr,info,align,goList,homologList,prots,out):
    # Retrieve time of day and date
    now = datetime.now()
    time_of_day = now.strftime("%H:%M:%S")  # Format time as hours:minutes:seconds
    date = now.strftime("%Y-%m-%d")  # Format date as year-month-day
    cnt = 0

    jsonwp = {
        'md:caption':'## [PROST](https://www.pnas.org/doi/10.1073/pnas.2211823120) v0.2.15 Search Results',
        'md:info':f'{info}',
        'md:info2':f'This search was conducted at {time_of_day} on {date}.'
        }
    if queryDB is not None and targetDB is not None:
        jsonwp['md:info1'] = f'The query database is **{queryDB}** and the target database is **{targetDB}**.'
    if thr is not None and gothr is not None:
        jsonwp['md:info3'] = f'The e-value threshold for homology detection is **{thr}** and the threshold for GO annotation enrichment is **{gothr}**'
    jsonwp['h3:tablecap'] = 'Query Database Proteins'
    jsonwp['md:info4'] = 'Please click the link under the "Query Protein" column to access results for each query protein listed'
    jsonwp['table:proteinList'] = {'columns':['l:Query Protein','# Homologs','# GO','Best Homolog','Best H. e-val'],'rows':[]}
    jsonwp['navpage:about'] = {'md:info5':'''### PROST Method
The Protein Language Search Tool (**PROST**) is a highly accurate and efficient homology search tool designed for remote homology prediction tasks. In comparison to the current state-of-the-art tools, such as CS-BLAST or PHMMER, PROST outperforms them in terms of accuracy and speed. PROST utilizes a protein language model and quantization technique to represent proteins in a numerical format that retains their biophysical, biochemical, and evolutionary information. PROST calculates the distances of all proteins in the database to the user's query protein and performs a statistical test based on the Z-Score of the distance distribution over the entire database. The results are presented with an expected value (e-value) that estimates the likelihood of a match occurring by chance. This value is calculated from the CDF of the z-score and is corrected for multiple testing using the Bonferroni method.

### Automatic GO Enrichment Analysis
The GO annotation enrichment pipeline in PROST allows for the selection of a different e-value cutoff. This threshold determines the level of significance for the enrichment analysis. To assess the significance of the enriched GO terms, contingency tables are constructed by comparing the frequency of individual GO terms in homologs and the Swissprot database. A term-specific p-value is then calculated by subjecting the contingency table to the Chi-square test. Subsequently, Bonferroni multiple p-test corrections are applied to correct the p-values for each term. Any GO term with a p-value greater than 0.001 is removed from the analysis. The remaining GO terms are evaluated based on their source proteins' e-values, which are combined using Stouffer’s method. The resulting GO terms are subjected to another round of multiple test correction, and the enriched terms are reported..

### Manuscript
PROST manuscript can be accessed form this [link](https://www.pnas.org/doi/10.1073/pnas.2211823120). Please cite if you used PROST for finding homologs.

### Python Package
PROST [python package](https://github.com/MesihK/prost) can be used to generate this result webpage with the help of JSONWP visualizer. Please cite both of the work (PROST and JSONWP)

### Citations

``` 
@article{kilinc2023improved,
  title={Improved global protein homolog detection with major gains in function identification},
  author={Kilinc, Mesih and Jia, Kejue and Jernigan, Robert L},
  journal={Proceedings of the National Academy of Sciences},
  volume={120},
  number={9},
  pages={e2211823120},
  year={2023},
  publisher={National Acad Sciences}
}
```
'''}
    jsonwp['navpage:disclaimer'] = {
       'p:disc':'For documents and software available from this server, we do not warrant or assume any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, product, or process disclosed. We do not endorse or recommend any commercial products, processes, or services. Some pages may provide links to other Internet sites for the convenience of users. We are not responsible for the availability or content of these external sites, nor do we endorse, warrant, or guarantee the products, services, or information described or offered at these other Internet sites. Information that is created by this site is within the public domain. It is not the intention to provide specific medically related advice but rather to provide users with information for better understanding. However, it is requested that in any subsequent use of this work, PROST be given appropriate acknowledgment. We do not collect any personally identifiable information (PII) about visitors to our Web sites.'
   }
    for queryP in prots:
        jsonwp[f'page:{queryP}'] = {
            'h2:caption':f'PROST Results for {queryP}',
            'h3:goCaption':'GO Annotations',
            'md:info1':'Link under "GO Term" column directs user to amigo geneontology website for a detailed inspection of the GO term',
            'table:goList':{'columns':['l:GO Term','Description','Confidence'],'rows':[]},
            'h3:homCaption':'Putative Homologs',
            'md:info2':'Links under "Uniprot" column directs user to Uniprot website for a detailed inspection. Links under "e-value" column opens sequence alignment and protein structure visualization page',
            'table:homList':{'columns':['l:Uniprot','Name','Type','Organim','Distance','l:e-value'],'rows':[]}
        }
        if queryP in goList:
            for go in goList[queryP]:
                jsonwp[f'page:{queryP}']['table:goList']['rows'].append([f'http://amigo.geneontology.org/amigo/term/{go[0]}@{go[0]}']+go[1:])
        else: goList[queryP] = []
        if queryP in homologList:
            for hom in homologList[queryP]:
                jsonwp[f'page:{queryP}']['table:homList']['rows'].append([f'https://www.uniprot.org/uniprot/{hom[0]}@{hom[0]}']+hom[1:5]+
                                                                        [f'{queryP}-{hom[0]}@{hom[5]}'])
                if align: jsonwp[f'page:{queryP}-{hom[0]}'] = createAlignmentPage(queryP,hom[0])
        else: homologList[queryP] = []
        #print(homologList[queryP][0])
        if len(homologList[queryP]) > 0:
            jsonwp['table:proteinList']['rows'].append([f'{queryP}@{queryP}',len(homologList[queryP]),len(goList[queryP]),homologList[queryP][0][0],homologList[queryP][0][5]])
        else:
            jsonwp['table:proteinList']['rows'].append([f'{queryP}@{queryP}',len(homologList[queryP]),len(goList[queryP]),'',''])
        cnt = cnt + 1
        if cnt % 20 == 0:
            size = sys.getsizeof(json.dumps(jsonwp))
            if size >= 33500000:
                break
    # Write the dictionary to a JSON file
    with open(out+'.json', "w") as f:
        json.dump(jsonwp, f)
    return cnt

In [10]:
def tojsonwp(tsv, go, align, info, out):
    '''Covnert search results into webpage using JSONWP tool'''
    goList,homologList = {},{}
    print(f'Read {tsv} file.') 
    with open(tsv,'r') as f:
        for line in f:
            line = line.strip().split('\t')
            if line[0] not in homologList: homologList[line[0]] = []
            homologList[line[0]].append(line[1:])
    
    with open(go,'r') as f:
        for line in f:
            line = line.strip().split('\t')
            if line[0] not in goList: goList[line[0]] = []
            goList[line[0]].append(line[1:])
    prots = list(homologList.keys())
    lprots = len(prots)
    cur = 0
    i =  0
    while cur < lprots:
        print(f'Create {i+1}.th json file.')
        cur += toJSONWP(None,None,None,None,info,align,goList,homologList,prots[cur:],out+f'.{i}')
        i += 1

In [16]:
tojsonwp('uniprot07.pdb08.05.prost.res.tsv','uniprot07.pdb08.prost.e05.top5.small.tsv',True,'Jernigan lab maize study.','JerniganLab-MaizeStudy')

Read uniprot07.pdb08.05.prost.res.tsv file.
Create 1.th json file.
Create 2.th json file.
