This is just an aggregation. Has nothing to do with the downstream processes. Those are 100% in pandas:

In [9]:
import pandas as pd
import json
import requests

from pyspark.sql.types import ArrayType, StringType, IntegerType
from pyspark.sql import SparkSession
import pyspark.sql.functions as f

# establish spark connection
spark = (
    SparkSession.builder
    .master('local[*]')
    .getOrCreate()
)

df = (
    spark.read.csv('/home/dsuveges/marine/plip_output.csv', sep=',', header=True)
    # Somehow there are duplications:
    .distinct() 
    .groupBy(['pdb_structure_id', 'compound_id', 'prot_residue_number','prot_chain_id', 'prot_residue_type'])
    .agg(
        f.collect_set(f.col('interaction_type')).alias('interaction_types')
    )
)

df.write.json('/home/dsuveges/marine/plip_output_aggregated.json')

Concatenating the partitions

In [10]:
%%bash 

aggregated='/home/dsuveges/marine/plip_output_aggregated.json'

cat ${aggregated}/*json \
    | gzip > ${aggregated}.gz

rm -rf ${aggregated}

In [29]:
import pandas as pd
from functools import reduce

# 319408 -> unique 265603
df = (
    pd.read_json('/home/dsuveges/marine/plip_output_aggregated.json.gz', orient='records', lines=True)
)

df.head()

Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types
0,1abi,DPN,53,I,ASN,[hbond]
1,1agn,ZN,174,C,CYS,[metal_complex]
2,1ai0,ZN,10,L,HIS,[metal_complex]
3,1am4,GNP,660,F,LEU,[hbond]
4,1an0,GDP,118,A,ASP,[saltbridge]


In [122]:
# Grouping data by pdb structure id:
grouped = df.groupby('pdb_structure_id')

# Selecting one of the groups:
test_df = grouped.get_group('3e7g')
test_df.head()




Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types
332,3e7g,H4B,479,C,GLU,[hydroph_interaction]
5304,3e7g,AT2,373,C,TYR,[hbond]
5305,3e7g,ZN,110,A,CYS,[metal_complex]
12503,3e7g,AT2,347,B,TYR,[hbond]
26321,3e7g,AT2,350,A,PRO,[hydroph_interaction]


In [243]:
def get_pdb_sifts_mapping(pdb_id: str) -> pd.DataFrame:
    URL = f'https://www.ebi.ac.uk/pdbe/graph-api/mappings/ensembl/{pdb_id}'
    
    while True:
        try:
            data = requests.get(URL).json()
            break
        except:
            continue

    return (
        pd.DataFrame(reduce(lambda x,y: x + y['mappings'], data[pdb_id]['Ensembl'].values(), []))
        .assign(
            author_start = lambda df: df.start.apply(lambda start: start['author_residue_number']),
            author_end = lambda df: df.end.apply(lambda end: end['author_residue_number']),
            uniprot_position = lambda df: df.apply(lambda row: list(range(row['unp_start'], row['unp_end']+1)), axis=1),
            diff = lambda df: df.apply(lambda row: row['author_start'] - row['unp_start'], axis=1)
        )
        .explode('uniprot_position')
        .assign(
            prot_residue_number = lambda df: df.apply(lambda row: row['uniprot_position'] + row['diff'], axis=1)
        )
        [['accession', 'chain_id', 'uniprot_position', 'prot_residue_number']]
        .rename(columns={'chain_id': 'prot_chain_id'})
        .drop_duplicates()
    )


def map2uniprot(plip_df: pd.DataFrame) -> pd.DataFrame:
    # Extracting pdb identifier:
    pdb_id = plip_df.pdb_structure_id.iloc[0]
    
    # Fetch mappings from pdb api:
    sifts_df = get_pdb_sifts_mapping(pdb_id)
    
    # Join with mapping:
    return (
        plip_df
        .merge(sifts_df, on=['prot_chain_id', 'prot_residue_number'], how='left')
    )


# test_df = grouped.get_group('3e7g')
# map2uniprot(test_df)
# get_pdb_sifts_mapping('3e7g')

# grouped = (
#     df
#     .query('pdb_structure_id == "3e7g" or pdb_structure_id == "13gs" ')
#     .groupby('pdb_structure_id')
# )


# grouped.apply(map2uniprot).reset_index(drop=True)



In [126]:
(
    test_df
    .merge(sifts_df, on=['prot_chain_id', 'prot_residue_number'], how='left')
)

Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,uniprot_position
0,3e7g,H4B,479,C,GLU,[hydroph_interaction],PRO_0000170930,479
1,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228,479
2,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228-2,440
3,3e7g,AT2,373,C,TYR,[hbond],P35228-2,334
4,3e7g,AT2,373,C,TYR,[hbond],PRO_0000170930,373
...,...,...,...,...,...,...,...,...
115,3e7g,ZN,115,C,CYS,[metal_complex],PRO_0000170930,115
116,3e7g,ZN,115,C,CYS,[metal_complex],P35228,115
117,3e7g,AT2,377,C,GLU,[hbond],P35228-2,338
118,3e7g,AT2,377,C,GLU,[hbond],PRO_0000170930,377


## Testing

We are experiencing some discrepancies in the mapping. We need to sort out which of the mapping is off.

In [127]:
# accession	ensemblProtId	pdb_structure_id	compound_id	prot_residue_number	prot_chain_id	prot_residue_type	interaction_types	uniprot_position	author_start	author_end	unp_start	unp_end	pos1	pos2	pos3	geneId	chr	strand
# 39	P35228	ENSP00000327251	3e7g	H4B	462	A	ILE	[hbond]	462	428	492	428	492	27767749	27767748	27767747	ENSG00000007171	17	-


pdb_id = '3e7g'
chain_id = 'A'
residue_no = 462

# Fetch dataframe:
test_df = grouped.get_group(pdb_id)

# Get mappings for all the residues:
sifts_df = get_pdb_sifts_mapping(pdb_id)

# Join with mapping:
mapped_df = (
    test_df
    .merge(sifts_df, on=['prot_chain_id', 'prot_residue_number'], how='left')
)

mapped_df.head()

Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,uniprot_position
0,3e7g,H4B,479,C,GLU,[hydroph_interaction],PRO_0000170930,479
1,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228,479
2,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228-2,440
3,3e7g,AT2,373,C,TYR,[hbond],P35228-2,334
4,3e7g,AT2,373,C,TYR,[hbond],PRO_0000170930,373


### Get uniprot -> ensembl mapping

We are using a file from uniprot

In [129]:
%%bash

wget https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz \
    -O /Users/dsuveges/project_data/marine/HUMAN_9606_idmapping.dat.gz
    
ls -lah /Users/dsuveges/project_data/marine/

total 223392
drwxr-xr-x    7 dsuveges  EBI\Domain Users   224B  5 May 14:06 .
drwxrwxr-x   34 dsuveges  EBI\Domain Users   1.1K  4 May 21:38 ..
-rw-r--r--    1 dsuveges  EBI\Domain Users    36M  2 Mar 15:00 HUMAN_9606_idmapping.dat.gz
-rw-r--r--    1 dsuveges  EBI\Domain Users   8.9M 27 Apr 21:25 plip_output.csv
drwxr-xr-x  404 dsuveges  EBI\Domain Users    13K  3 May 21:54 plip_output_aggregated.json
-rw-r--r--    1 dsuveges  EBI\Domain Users   2.9M  3 May 21:55 plip_output_aggregated.json.gz
-rw-r--r--    1 dsuveges  EBI\Domain Users    61M 27 Apr 21:25 residue_genomic_position.json


--2022-05-05 14:06:20--  https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz
Resolving ftp.uniprot.org (ftp.uniprot.org)... 128.175.240.195
Connecting to ftp.uniprot.org (ftp.uniprot.org)|128.175.240.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 37402727 (36M) [application/x-gzip]
Saving to: ‘/Users/dsuveges/project_data/marine/HUMAN_9606_idmapping.dat.gz’

     0K .......... .......... .......... .......... ..........  0%  218K 2m47s
    50K .......... .......... .......... .......... ..........  0%  559K 1m56s
   100K .......... .......... .......... .......... ..........  0% 39.8M 78s
   150K .......... .......... .......... .......... ..........  0%  571K 74s
   200K .......... .......... .......... .......... ..........  0% 20.1M 60s
   250K .......... .......... .......... .......... ..........  0% 65.5M 50s
   300K .......... .......... .......... .......... ..........  0% 6

In [139]:
id_mappings = (
    pd.read_csv('/Users/dsuveges/project_data/marine/HUMAN_9606_idmapping.dat.gz', sep='\t', names=['accession', 'source', 'identifier'])
    .query('source == "Ensembl_PRO"')
    .drop('source', axis=1)
    .rename(columns={'identifier': 'translation_id'})
)

In [141]:
mapped_w_ensp = (
    mapped_df
    .merge(id_mappings, on='accession', how='inner')
)

In [142]:
mapped_w_ensp.head()

Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,uniprot_position,translation_id
0,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228,479,ENSP00000327251
1,3e7g,AT2,373,C,TYR,[hbond],P35228,373,ENSP00000327251
2,3e7g,ZN,110,A,CYS,[metal_complex],P35228,110,ENSP00000327251
3,3e7g,AT2,347,B,TYR,[hbond],P35228,347,ENSP00000327251
4,3e7g,AT2,350,A,PRO,[hydroph_interaction],P35228,350,ENSP00000327251


In [149]:
def fetch_residue(row: pd.Series) -> str:
    """Fetch amnino acid from Ensembl based on the translation id and uniprot_position"""
    
    pos = row['uniprot_position']
    tid = row['translation_id']

    URL = f'https://rest.ensembl.org/sequence/id/{tid}?content-type=text/plain&start={pos}&end={pos}'
    try:
        return requests.get(URL).text
    except:
        return None
    
mapped_w_ensp['aa_check'] = mapped_w_ensp.apply(fetch_residue, axis=1)
mapped_w_ensp.head()

Unnamed: 0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,uniprot_position,translation_id,aa_check
0,3e7g,H4B,479,C,GLU,[hydroph_interaction],P35228,479,ENSP00000327251,E
1,3e7g,AT2,373,C,TYR,[hbond],P35228,373,ENSP00000327251,Y
2,3e7g,ZN,110,A,CYS,[metal_complex],P35228,110,ENSP00000327251,C
3,3e7g,AT2,347,B,TYR,[hbond],P35228,347,ENSP00000327251,Y
4,3e7g,AT2,350,A,PRO,[hydroph_interaction],P35228,350,ENSP00000327251,P


In [152]:
mapped_w_ensp[['prot_residue_type', 'aa_check']].drop_duplicates()

Unnamed: 0,prot_residue_type,aa_check
0,GLU,E
1,TYR,Y
2,CYS,C
4,PRO,P
5,TRP,W
7,ARG,R
9,PHE,F
10,VAL,V
19,ILE,I


The above example shows that the mapping from pdb chain/position to uniprot position works. That's great. What is the case with the protein position to genome mapping?

In [204]:
# Joining with genome mappings:
genome_mappings = (
    spark.read.csv('/Users/dsuveges/project_data/marine/generated_mappings.tsv.gz', sep='\t', header=True)
    .withColumnRenamed('protein_id', 'translation_id')
    .withColumnRenamed('amino_acid_position', 'uniprot_position')
)

# # kicsi <- nagy: 2m 12s
mapped_w_genome = (
    spark.createDataFrame(mapped_w_ensp)
    .join(genome_mappings, on=['translation_id', 'uniprot_position'], how='left')
)

# nagy <- kicsi: 2m 17s
# mapped_w_genome = (
#     genome_mappings
#     .join(spark.createDataFrame(mapped_w_ensp), on=['translation_id', 'uniprot_position'], how='right')
# )

# mapped_w_genome_pd = mapped_w_genome.toPandas()
# mapped_w_genome_pd.head()
mapped_w_genome_pd.count()
# kicsi <- nagy: 2m 12s


translation_id         80
uniprot_position       80
pos1                   80
pos2                   80
pos3                   80
gene_id                80
chr                    80
strand                 80
pdb_structure_id       80
compound_id            80
prot_residue_number    80
prot_chain_id          80
prot_residue_type      80
interaction_types      80
accession              80
aa_check               80
dtype: int64

In [196]:
mapped_w_genome_pd

Unnamed: 0,translation_id,uniprot_position,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,aa_check,pos1,pos2,pos3,gene_id,chr,strand
0,ENSP00000327251,352,3e7g,AT2,352,C,VAL,[hydroph_interaction],P35228,V,27779007,27779006,27779005,ENSG00000007171,17,-
1,ENSP00000327251,352,3e7g,AT2,352,B,VAL,[hydroph_interaction],P35228,V,27779007,27779006,27779005,ENSG00000007171,17,-
2,ENSP00000327251,115,3e7g,ZN,115,A,CYS,[metal_complex],P35228,C,27787802,27787801,27787800,ENSG00000007171,17,-
3,ENSP00000327251,115,3e7g,ZN,115,B,CYS,[metal_complex],P35228,C,27787802,27787801,27787800,ENSG00000007171,17,-
4,ENSP00000327251,115,3e7g,ZN,115,D,CYS,[metal_complex],P35228,C,27787802,27787801,27787800,ENSG00000007171,17,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,ENSP00000327251,377,3e7g,AT2,377,B,GLU,[hbond],P35228,E,27778932,27778931,27778930,ENSG00000007171,17,-
76,ENSP00000327251,377,3e7g,AT2,377,D,GLU,[hbond],P35228,E,27778932,27778931,27778930,ENSG00000007171,17,-
77,ENSP00000327251,377,3e7g,AT2,377,A,GLU,[hbond],P35228,E,27778932,27778931,27778930,ENSG00000007171,17,-
78,ENSP00000327251,377,3e7g,AT2,377,C,GLU,[hbond],P35228,E,27778932,27778931,27778930,ENSG00000007171,17,-


Checking the genomic positions for every amino acids in the list:

In [197]:
(
    mapped_w_genome_pd
    [['translation_id', 'uniprot_position', 'prot_residue_type', 'aa_check', 
      'pos1', 'pos2', 'pos3', 'chr']]
    .drop_duplicates()
    
)

Unnamed: 0,translation_id,uniprot_position,prot_residue_type,aa_check,pos1,pos2,pos3,chr
0,ENSP00000327251,352,VAL,V,27779007,27779006,27779005,17
2,ENSP00000327251,115,CYS,C,27787802,27787801,27787800,17
6,ENSP00000327251,110,CYS,C,27787817,27787816,27787815,17
10,ENSP00000327251,347,TYR,Y,27779022,27779021,27779020,17
13,ENSP00000482291,110,CYS,C,27787817,27787816,27787815,17
17,ENSP00000482291,342,ARG,R,27778920,27778919,27778918,17
21,ENSP00000482291,338,GLU,E,27778932,27778931,27778930,17
25,ENSP00000327251,463,TRP,W,27774346,27774345,27774344,17
29,ENSP00000327251,381,ARG,R,27778920,27778919,27778918,17
33,ENSP00000482291,424,TRP,W,27774346,27774345,27774344,17


In [198]:
%%bash 

start=115
end=115
proteinid='ENSP00000327251'

# Extracting protein sequence:
prot_sequence=$(curl -s "https://rest.ensembl.org/sequence/id/${proteinid}?content-type=text/plain&start=${start}&end=${end}")

# Extracting the genomic sequence:
coordinates=$(curl -s  "https://rest.ensembl.org/map/translation/${proteinid}/${start}..${end}?content-type=application/json" \
    | jq -r '.mappings[] | "\(.seq_region_name):\(.start)..\(.end):\(.strand)"')


dna_sequence=$(curl -s "https://rest.ensembl.org/sequence/region/human/${coordinates}?content-type=text/plain")

echo "Protein sequence: ${prot_sequence}"
echo "Coordinates of the protein: ${coordinates}"
echo "DNA sequence: ${dna_sequence}"

Protein sequence: C
Coordinates of the protein: 17:27787800..27787802:-1
DNA sequence: TGC


## Final run

As I have double checked and all things are good, let's run full scale!

In [None]:
pandarallel.initialize(progress_bar=True)


# Generate all possible protein mappings:
processed = df.groupby('pdb_structure_id').parallel_apply(map2uniprot)



In [240]:
single_interaction_structures = (
    df.pdb_structure_id
    .value_counts()
    .loc[lambda s: s==1]
    .index.to_list()
)

subset = (
    df
    .loc[df.pdb_structure_id.isin(single_interaction_structures)]
    .reset_index(drop=True)
)

In [246]:
subset.head().groupby('pdb_structure_id').apply(map2uniprot)

Unnamed: 0_level_0,Unnamed: 1_level_0,pdb_structure_id,compound_id,prot_residue_number,prot_chain_id,prot_residue_type,interaction_types,accession,uniprot_position
pdb_structure_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1x68,0,1x68,ZN,32,A,HIS,[metal_complex],PRO_0000075742,246.0
1x68,1,1x68,ZN,32,A,HIS,[metal_complex],Q5TD97,246.0
3vfk,0,3vfk,CCS,77,A,LYS,[hbond],,
4pok,0,4pok,COM,12,A,GLN,[hbond],P10599-2,12.0
4pok,1,4pok,COM,12,A,GLN,[hbond],PRO_0000120005,12.0
4pok,2,4pok,COM,12,A,GLN,[hbond],P10599,12.0
6gt6,0,6gt6,CYS,47,B,ILE,[hbond],PRO_0000027835,404.0
6gt6,1,6gt6,CYS,47,B,ILE,[hbond],P00748,404.0
7ad6,0,7ad6,CYS,700,A,TYR,[hbond],PRO_0000005987,700.0
7ad6,1,7ad6,CYS,700,A,TYR,[hbond],PRO_0000005988,700.0


In [247]:
pd.DataFrame(columns=['accession', 'uniprot_position', 'prot_chain_id', 'prot_residue_number'])

Unnamed: 0,accession,uniprot_position,prot_chain_id,prot_residue_number


In [254]:
(
    pd.read_csv('~/project_data/validation_lab/2022.05/PPP_OTVL_May2022/CO_Biomarkers.txt', sep='\t')
    .rename(columns={'CO_line': 'cell_line'})
    .assign(tissue = 'CO')
    .to_csv('~/project_data/validation_lab/2022.05/PPP_OTVL_May2022/CO_Biomarkers_updated.txt', sep='\t', index=False)
)

In [261]:
%%bash

head ~/project_data/validation_lab/2022.05/PPP_OTVL_May2022/CO_Biomarkers_updated.txt | column -t

cell_line  MS_status  CRIS_subtype  KRAS_status  TP53_status  APC_status  BRAF_status  tissue
SW626      MSS        ?             mut          mut          mut         wt           CO
HT29       MSS        B             wt           mut          mut         wt           CO
SW837      MSS        B             mut          mut          mut         mut          CO
MDST8      MSS        D             wt           wt           mut         wt           CO
HCT116     MSI        D             mut          wt           wt          mut          CO
KM12       MSI        A             wt           mut          mut         mut          CO
RKO        MSI        ?             wt           wt           wt          wt           CO
LS180      MSI        A             mut          wt           wt          mut          CO


In [260]:
print(
    pd.DataFrame([
        {
            'gene': 'ADSL',
            'biomarker': 'MS_status',
            'status': 'MSS',
            'hypotheisStatus': True
        },
        {
            'gene': 'ADSL',
            'biomarker': 'MS_status',
            'status': 'MSS',
            'hypotheisStatus': True
        },
        {
            'gene': 'ADSL',
            'biomarker': 'KRAS_status',
            'status': 'mut',
            'hypotheisStatus': True
        },
        {
            'gene': 'ADSL',
            'biomarker': 'KRAS_status',
            'status': 'wt',
            'hypotheisStatus': True
        }
    ])
    .to_markdown(index=False)

)

| gene   | biomarker   | status   | hypotheisStatus   |
|:-------|:------------|:---------|:------------------|
| ADSL   | MS_status   | MSS      | True              |
| ADSL   | MS_status   | MSS      | True              |
| ADSL   | KRAS_status | mut      | True              |
| ADSL   | KRAS_status | wt       | True              |


In [274]:
pdb ='a'

pdb = (
    spark.read.parquet('/Users/dsuveges/project_data/marine/pdb2variants2disease')
    .withColumnRenamed('pdbCompound', 'compoundId')
    .withColumnRenamed('symbol', 'targetSymbol')
    
    .select(
        'compoundId',
        'drugName',
        'variantIds',
        'diseaseName',
        f.col('name').alias('targetname'),
        'targetSymbol'
    )
    .persist()
)

pdb.count()

33

In [267]:
(
    pdb_data
    .select('compoundId', 'drugName')
#     .count()
    .show(40, truncate=False)
)

+----------+-------------------------------------------+
|compoundId|drugName                                   |
+----------+-------------------------------------------+
|DDF       |LOMETREXOL                                 |
|FAD       |FLAVIN ADENIN DINUCLEOTIDE                 |
|K81       |LXH254                                     |
|SM5       |CHEMBL526479                               |
|K81       |LXH254                                     |
|AGS       |ATPGAMMAS                                  |
|032       |VEMURAFENIB                                |
|NCC       |CHEMBL1234647                              |
|MR6       |2-(3,5-Dimethylphenyl)Benzo[D]Oxazole      |
|ABU       |GAMMA-AMINOBUTYRIC ACID                    |
|GDP       |CHEMBL384759                               |
|GP2       |PHOSPHOMETHYLPHOSPHONIC ACID GUANOSYL ESTER|
|AEE       |AEE-788                                    |
|IRE       |GEFITINIB                                  |
|GTP       |GUANOSINE TRIPHOSPH

In [268]:
(
    pdb_data
    .select('compoundId', 'targetName')
    .count()
#     .show(40, truncate=False)
)

33

In [310]:
t = (
    spark.createDataFrame([
        {
            'id': 1,
            'a': {'t1': 1, 't2': 2, 't3': 3,'t4': 4},
            'b': {'t5': 5,'t2': 6,'t3': 7,'t4': 8}
        }
    ])
    .persist()
)

In [311]:
t.show(truncate=False)

+------------------------------------+------------------------------------+---+
|a                                   |b                                   |id |
+------------------------------------+------------------------------------+---+
|{t4 -> 4, t1 -> 1, t2 -> 2, t3 -> 3}|{t4 -> 8, t5 -> 5, t2 -> 6, t3 -> 7}|1  |
+------------------------------------+------------------------------------+---+



In [12]:
df = pd.read_csv('/Users/dsuveges/project_data/marine/interesting_drug_repurpose.tar.gz',
                 encoding='utf-8', sep='\t')
df

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x97 in position 2056: invalid start byte

In [18]:
d = {
        "datasourceId": "ot_crispr_validation",
        "datatypeId": "ot_validation_lab",
        "releaseDate": "2022-05-09",
        "releaseVersion": "v1.0"
    }
print(d.keys())

dict_keys(['datasourceId', 'datatypeId', 'releaseDate', 'releaseVersion'])
