In [1]:
!pip install PyBioMed

Collecting PyBioMed
  Downloading PyBioMed-1.0.tar.gz (11.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: PyBioMed
  Building wheel for PyBioMed (setup.py) ... [?25l[?25hdone
  Created wheel for PyBioMed: filename=PyBioMed-1.0-py3-none-any.whl size=11266895 sha256=a5f791fa032e0a57a0c46f94dd4d25e3cde985a00c6c0ff5ea22568cef2a4cff
  Stored in directory: /root/.cache/pip/wheels/03/c8/be/74f7e3ad04bbbb4a2977551bc19e784919a025b97ba6da1a17
Successfully built PyBioMed
Installing collected packages: PyBioMed
Successfully installed PyBioMed-1.0


In [2]:
!pip install Pybel
!pip install RDkit

Collecting Pybel
  Downloading pybel-0.15.5-py3-none-any.whl (387 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m387.8/387.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting bel-resources>=0.0.3 (from Pybel)
  Downloading bel_resources-0.0.3-py3-none-any.whl (17 kB)
Collecting requests-file (from Pybel)
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Collecting bioregistry (from Pybel)
  Downloading bioregistry-0.10.113-py3-none-any.whl (10.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ratelimit (from Pybel)
  Downloading ratelimit-2.2.1.tar.gz (5.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pystow>=0.1.2 (from Pybel)
  Downloading pystow-0.5.2-py3-none-any.whl (31 kB)
Collecting psycopg2-binary (from Pybel)
  Downloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K     

In [4]:
import os
import PyBioMed

from PyBioMed.PyProtein.AAComposition import (
    CalculateAAComposition,
    CalculateDipeptideComposition,
    GetSpectrumDict,
)
from PyBioMed.PyProtein.AAIndex import GetAAIndex1, GetAAIndex23
from PyBioMed.PyProtein.Autocorrelation import (
    CalculateEachGearyAuto,
    CalculateEachMoranAuto,
    CalculateEachNormalizedMoreauBrotoAuto,
    CalculateGearyAutoTotal,
    CalculateMoranAutoTotal,
    CalculateNormalizedMoreauBrotoAutoTotal,
)
from PyBioMed.PyProtein.ConjointTriad import CalculateConjointTriad
from PyBioMed.PyProtein.CTD import CalculateCTD
from PyBioMed.PyProtein.GetSubSeq import GetSubSequence
from PyBioMed.PyProtein.PseudoAAC import GetAPseudoAAC, GetPseudoAAC, _GetPseudoAAC
from PyBioMed.PyProtein.QuasiSequenceOrder import (
    GetQuasiSequenceOrder,
    GetQuasiSequenceOrderp,
    GetSequenceOrderCouplingNumberp,
    GetSequenceOrderCouplingNumberTotal,
)


class PyProtein:
    """
    This GetProDes class aims at collecting all descriptor calcualtion modules into a simple class.

    """

    AALetter = [
        "A",
        "R",
        "N",
        "D",
        "C",
        "E",
        "Q",
        "G",
        "H",
        "I",
        "L",
        "K",
        "M",
        "F",
        "P",
        "S",
        "T",
        "W",
        "Y",
        "V",
    ]

    Version = 1.0

    def __init__(self, ProteinSequence=""):
        """
        input a protein sequence
        """
        if len(ProteinSequence) == 0:
            print(
                "You must input a protein sequence when constructing a object. It is a string!"
            )
        else:
            self.ProteinSequence = ProteinSequence

    def GetAAComp(self):
        """
        amino acid compositon descriptors (20)

        Usage:

        result = GetAAComp()
        """
        res = CalculateAAComposition(self.ProteinSequence)
        return res

    def GetDPComp(self):
        """
        dipeptide composition descriptors (400)

        Usage:

        result = GetDPComp()
        """
        res = CalculateDipeptideComposition(self.ProteinSequence)
        return res

    def GetTPComp(self):
        """
        tri-peptide composition descriptors (8000)

        Usage:

        result = GetTPComp()
        """
        res = GetSpectrumDict(self.ProteinSequence)
        return res

    def GetMoreauBrotoAuto(self):
        """
        Normalized Moreau-Broto autocorrelation descriptors (240)

        Usage:

        result = GetMoreauBrotoAuto()
        """
        res = CalculateNormalizedMoreauBrotoAutoTotal(self.ProteinSequence)
        return res

    def GetMoranAuto(self):
        """
        Moran autocorrelation descriptors (240)

        Usage:

        result = GetMoranAuto()
        """
        res = CalculateMoranAutoTotal(self.ProteinSequence)
        return res

    def GetGearyAuto(self):
        """
        Geary autocorrelation descriptors (240)

        Usage:

        result = GetGearyAuto()
        """
        res = CalculateGearyAutoTotal(self.ProteinSequence)
        return res

    def GetCTD(self):
        """
        Composition Transition Distribution descriptors (147)

        Usage:

        result = GetCTD()
        """
        res = CalculateCTD(self.ProteinSequence)
        return res

    def GetPAAC(self, lamda=10, weight=0.05):
        """
        Type I Pseudo amino acid composition descriptors (default is 30)

        Usage:

        result = GetPAAC(lamda=10,weight=0.05)

        lamda factor reflects the rank of correlation and is a non-Negative integer, such as 15.

        Note that (1)lamda should NOT be larger than the length of input protein sequence;

        (2) lamda must be non-Negative integer, such as 0, 1, 2, ...; (3) when lamda =0, the

        output of PseAA server is the 20-D amino acid composition.

        weight factor is designed for the users to put weight on the additional PseAA components

        with respect to the conventional AA components. The user can select any value within the

        region from 0.05 to 0.7 for the weight factor.
        """
        res = _GetPseudoAAC(self.ProteinSequence, lamda=lamda, weight=weight)
        return res

    def GetPAACp(self, lamda=10, weight=0.05, AAP=[]):
        """
        Type I Pseudo amino acid composition descriptors for the given properties (default is 30)

        Usage:

        result = GetPAACp(lamda=10,weight=0.05,AAP=[])

        lamda factor reflects the rank of correlation and is a non-Negative integer, such as 15.

        Note that (1)lamda should NOT be larger than the length of input protein sequence;

        (2) lamda must be non-Negative integer, such as 0, 1, 2, ...; (3) when lamda =0, the

        output of PseAA server is the 20-D amino acid composition.

        weight factor is designed for the users to put weight on the additional PseAA components

        with respect to the conventional AA components. The user can select any value within the

        region from 0.05 to 0.7 for the weight factor.

        AAP is a list form containing the properties, each of which is a dict form.
        """
        res = GetPseudoAAC(self.ProteinSequence, lamda=lamda, weight=weight, AAP=AAP)
        return res

    def GetAPAAC(self, lamda=10, weight=0.5):
        """
        Amphiphilic (Type II) Pseudo amino acid composition descriptors

        default is 30

        Usage:

        result = GetAPAAC(lamda=10,weight=0.5)

        lamda factor reflects the rank of correlation and is a non-Negative integer, such as 15.

        Note that (1)lamda should NOT be larger than the length of input protein sequence;

        (2) lamda must be non-Negative integer, such as 0, 1, 2, ...; (3) when lamda =0, the

        output of PseAA server is the 20-D amino acid composition.

        weight factor is designed for the users to put weight on the additional PseAA components

        with respect to the conventional AA components. The user can select any value within the

        region from 0.05 to 0.7 for the weight factor.

        """
        res = GetAPseudoAAC(self.ProteinSequence, lamda=lamda, weight=weight)
        return res

    def GetSOCN(self, maxlag=45):
        """
        Sequence order coupling numbers  default is 45

        Usage:

        result = GetSOCN(maxlag=45)

        maxlag is the maximum lag and the length of the protein should be larger

        than maxlag. default is 45.
        """
        res = GetSequenceOrderCouplingNumberTotal(self.ProteinSequence, maxlag=maxlag)
        return res

    def GetSOCNp(self, maxlag=45, distancematrix={}):
        """
        Sequence order coupling numbers  default is 45

        Usage:

        result = GetSOCN(maxlag=45)

        maxlag is the maximum lag and the length of the protein should be larger

        than maxlag. default is 45.

        distancematrix is a dict form containing 400 distance values
        """
        res = GetSequenceOrderCouplingNumberp(
            self.ProteinSequence, maxlag=maxlag, distancematrix=distancematrix
        )
        return res

    def GetQSO(self, maxlag=30, weight=0.1):
        """
        Quasi sequence order descriptors  default is 50

        result = GetQSO(maxlag=30, weight=0.1)

        maxlag is the maximum lag and the length of the protein should be larger

        than maxlag. default is 45.
        """
        res = GetQuasiSequenceOrder(self.ProteinSequence, maxlag=maxlag, weight=weight)
        return res

    def GetQSOp(self, maxlag=30, weight=0.1, distancematrix={}):
        """
        Quasi sequence order descriptors  default is 50

        result = GetQSO(maxlag=30, weight=0.1)

        maxlag is the maximum lag and the length of the protein should be larger

        than maxlag. default is 45.

        distancematrix is a dict form containing 400 distance values
        """
        res = GetQuasiSequenceOrderp(
            self.ProteinSequence,
            maxlag=maxlag,
            weight=weight,
            distancematrix=distancematrix,
        )
        return res

    def GetMoreauBrotoAutop(self, AAP={}, AAPName="p"):
        """
        Normalized Moreau-Broto autocorrelation descriptors for the given property (30)

        Usage:

        result = GetMoreauBrotoAutop(AAP={},AAPName='p')

        AAP is a dict containing physicochemical properities of 20 amino acids
        """
        res = CalculateEachNormalizedMoreauBrotoAuto(
            self.ProteinSequence, AAP=AAP, AAPName=AAPName
        )
        return res

    def GetMoranAutop(self, AAP={}, AAPName="p"):
        """
        Moran autocorrelation descriptors for the given property (30)

        Usage:

        result = GetMoranAutop(AAP={},AAPName='p')

        AAP is a dict containing physicochemical properities of 20 amino acids
        """
        res = CalculateEachMoranAuto(self.ProteinSequence, AAP=AAP, AAPName=AAPName)
        return res

    def GetGearyAutop(self, AAP={}, AAPName="p"):
        """
        Geary autocorrelation descriptors for the given property (30)

        Usage:

        result = GetGearyAutop(AAP={},AAPName='p')

        AAP is a dict containing physicochemical properities of 20 amino acids
        """
        res = CalculateEachGearyAuto(self.ProteinSequence, AAP=AAP, AAPName=AAPName)
        return res

    def GetSubSeq(self, ToAA="S", window=3):
        """
        obtain the sub sequences wit length 2*window+1, whose central point is ToAA

        Usage:

        result = GetSubSeq(ToAA='S',window=3)

        ToAA is the central (query point) amino acid in the sub-sequence.

        window is the span.
        """
        res = GetSubSequence(self.ProteinSequence, ToAA=ToAA, window=window)
        return res

    def GetTriad(self):
        """
        Calculate the conjoint triad features from protein sequence.

        Useage:

        res = GetTriad()

        Output is a dict form containing all 343 conjoint triad features.
        """
        res = CalculateConjointTriad(self.ProteinSequence)
        return res

    def GetALL(self):
        """
        Calcualte all descriptors except tri-peptide descriptors
        """
        res = {}
        res.update(self.GetAAComp())
        res.update(self.GetDPComp())
        res.update(self.GetTPComp())
        res.update(self.GetMoreauBrotoAuto())
        res.update(self.GetMoranAuto())
        res.update(self.GetGearyAuto())
        res.update(self.GetCTD())
        res.update(self.GetPAAC())
        res.update(self.GetAPAAC())
        res.update(self.GetSOCN())
        res.update(self.GetQSO())
        res.update(self.GetTriad())
        return res

    def GetAAindex1(self, name, path="."):
        """
        Get the amino acid property values from aaindex1

        Usage:

        result=GetAAIndex1(name)

        Input: name is the name of amino acid property (e.g., KRIW790103)

        Output: result is a dict form containing the properties of 20 amino acids
        """

        return GetAAIndex1(name, path=path)

    def GetAAindex23(self, name, path="."):
        """
        Get the amino acid property values from aaindex2 and aaindex3

        Usage:

        result=GetAAIndex23(name)

        Input: name is the name of amino acid property (e.g.,TANS760101,GRAR740104)

        Output: result is a dict form containing the properties of 400 amino acid pairs
        """
        return GetAAIndex23(name, path=path)

In [6]:
protein = "MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL"
cds = PyProtein(protein)

{'A': 6.053, 'R': 4.116, 'N': 5.327, 'D': 3.874, 'C': 3.148, 'E': 4.6, 'Q': 4.6, 'G': 6.78, 'H': 2.663, 'I': 7.506, 'L': 9.443, 'K': 3.874, 'M': 2.421, 'F': 6.053, 'P': 2.906, 'S': 7.022, 'T': 5.569, 'W': 1.937, 'Y': 3.39, 'V': 8.717, 'AA': 0.24, 'AR': 0.24, 'AN': 0.24, 'AD': 0.24, 'AC': 0.24, 'AE': 0.0, 'AQ': 0.0, 'AG': 0.0, 'AH': 0.24, 'AI': 1.21, 'AL': 0.24, 'AK': 0.49, 'AM': 0.0, 'AF': 0.49, 'AP': 0.49, 'AS': 0.49, 'AT': 0.24, 'AW': 0.0, 'AY': 0.49, 'AV': 0.49, 'RA': 0.24, 'RR': 0.49, 'RN': 0.24, 'RD': 0.24, 'RC': 0.0, 'RE': 0.0, 'RQ': 0.24, 'RG': 0.0, 'RH': 0.0, 'RI': 0.24, 'RL': 0.24, 'RK': 0.24, 'RM': 0.0, 'RF': 0.24, 'RP': 0.0, 'RS': 0.73, 'RT': 0.24, 'RW': 0.0, 'RY': 0.24, 'RV': 0.49, 'NA': 0.0, 'NR': 0.0, 'NN': 0.0, 'ND': 0.24, 'NC': 0.49, 'NE': 0.24, 'NQ': 0.24, 'NG': 0.97, 'NH': 0.0, 'NI': 0.49, 'NL': 0.49, 'NK': 0.49, 'NM': 0.0, 'NF': 0.24, 'NP': 0.24, 'NS': 0.24, 'NT': 0.24, 'NW': 0.24, 'NY': 0.24, 'NV': 0.24, 'DA': 0.0, 'DR': 0.24, 'DN': 0.49, 'DD': 0.0, 'DC': 0.0, 'DE':

In [7]:
import pandas as pd
res2 = cds.GetALL()

df = pd.DataFrame(list(res2.items()), columns=['Key', 'Value'])

In [8]:
print(df)

      Key  Value
0       A  6.053
1       R  4.116
2       N  5.327
3       D  3.874
4       C  3.148
...   ...    ...
9875  773  0.000
9876  774  0.000
9877  775  0.000
9878  776  1.000
9879  777  0.000

[9880 rows x 2 columns]


In [9]:
df = df[df['Key'].str.len() > 3]
df.reset_index(drop=True, inplace=True)

In [10]:
df.info

<bound method DataFrame.info of                                   Key     Value
0     MoreauBrotoAuto_Hydrophobicity1  0.173000
1     MoreauBrotoAuto_Hydrophobicity2  0.171000
2     MoreauBrotoAuto_Hydrophobicity3  0.174000
3     MoreauBrotoAuto_Hydrophobicity4  0.171000
4     MoreauBrotoAuto_Hydrophobicity5  0.169000
...                               ...       ...
1112                       QSOgrant46  0.033635
1113                       QSOgrant47  0.033115
1114                       QSOgrant48  0.030667
1115                       QSOgrant49  0.032042
1116                       QSOgrant50  0.033809

[1117 rows x 2 columns]>

In [11]:
df

Unnamed: 0,Key,Value
0,MoreauBrotoAuto_Hydrophobicity1,0.173000
1,MoreauBrotoAuto_Hydrophobicity2,0.171000
2,MoreauBrotoAuto_Hydrophobicity3,0.174000
3,MoreauBrotoAuto_Hydrophobicity4,0.171000
4,MoreauBrotoAuto_Hydrophobicity5,0.169000
...,...,...
1112,QSOgrant46,0.033635
1113,QSOgrant47,0.033115
1114,QSOgrant48,0.030667
1115,QSOgrant49,0.032042


In [12]:
df['Key'] = df['Key'].str.replace('\d+', '')
df['Key_prefix'] = df['Key'].str[:-1]
aggregated_df = df.groupby('Key_prefix').agg({'Key': 'first', 'Value': 'mean'}).reset_index(drop=True)



print(aggregated_df)

                               Key       Value
0                            APAAC    2.663850
1          GearyAuto_AvFlexibility    0.945033
2             GearyAuto_FreeEnergy    1.001967
3         GearyAuto_Hydrophobicity    0.950500
4             GearyAuto_Mutability    1.009167
5         GearyAuto_Polarizability    0.993067
6             GearyAuto_ResidueASA    0.995300
7             GearyAuto_ResidueVol    0.979967
8                 GearyAuto_Steric    0.967600
9          MoranAuto_AvFlexibility    0.045200
10            MoranAuto_FreeEnergy   -0.001933
11        MoranAuto_Hydrophobicity    0.049433
12            MoranAuto_Mutability   -0.005133
13        MoranAuto_Polarizability    0.004133
14            MoranAuto_ResidueASA   -0.003500
15            MoranAuto_ResidueVol    0.010467
16                MoranAuto_Steric    0.007167
17   MoreauBrotoAuto_AvFlexibility    0.060267
18      MoreauBrotoAuto_FreeEnergy    0.038533
19  MoreauBrotoAuto_Hydrophobicity    0.166267
20      Morea

  df['Key'] = df['Key'].str.replace('\d+', '')


In [17]:
mol_des = pd.read_csv('b2ar_no_pr.csv')

In [18]:
n = 0
for i in aggregated_df['Key']:
  mol_des[i] = aggregated_df['Value'][n]
  n += 1
print(aggregated_df)

                               Key       Value
0                            APAAC    2.663850
1          GearyAuto_AvFlexibility    0.945033
2             GearyAuto_FreeEnergy    1.001967
3         GearyAuto_Hydrophobicity    0.950500
4             GearyAuto_Mutability    1.009167
5         GearyAuto_Polarizability    0.993067
6             GearyAuto_ResidueASA    0.995300
7             GearyAuto_ResidueVol    0.979967
8                 GearyAuto_Steric    0.967600
9          MoranAuto_AvFlexibility    0.045200
10            MoranAuto_FreeEnergy   -0.001933
11        MoranAuto_Hydrophobicity    0.049433
12            MoranAuto_Mutability   -0.005133
13        MoranAuto_Polarizability    0.004133
14            MoranAuto_ResidueASA   -0.003500
15            MoranAuto_ResidueVol    0.010467
16                MoranAuto_Steric    0.007167
17   MoreauBrotoAuto_AvFlexibility    0.060267
18      MoreauBrotoAuto_FreeEnergy    0.038533
19  MoreauBrotoAuto_Hydrophobicity    0.166267
20      Morea

In [19]:
mol_des

Unnamed: 0,ID,Smiles,Target Name,Ki,exactmw,amw,lipinskiHBA,lipinskiHBD,NumRotatableBonds,NumHBD,...,QSOgrant,_ChargeC,_HydrophobicityC,_NormalizedVDWVC,_PolarityC,_PolarizabilityC,_SecondaryStrC,_SolventAccessibilityC,taugrant,tausw
0,CHEMBL1083364,C=CCc1ccccc1OCC(O)CN1CCC(N2C(=O)c3cccc4cccc(c3...,Beta-2 adrenergic receptor,40.00000,470.220557,470.569,6.0,1.0,8.0,1.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
1,CHEMBL565547,Cc1nc2c3c(nc(Cc4ccccc4)n2n1)CCNCC3,Beta-2 adrenergic receptor,7600.00000,293.164046,293.374,5.0,1.0,2.0,1.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
2,CHEMBL1257914,Oc1ccc(OCC(O)CNC2CCN(c3ncnc4scc(-c5ccccc5)c34)...,Beta-2 adrenergic receptor,81.00000,476.188212,476.602,7.0,3.0,8.0,3.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
3,CHEMBL229429,N#Cc1cc(F)cc(C#Cc2csc(CF)n2)c1,Beta-2 adrenergic receptor,10000.00000,260.021976,260.268,2.0,0.0,1.0,0.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
4,CHEMBL1632174,O=C(O)CCCCCN1CCC(CNC(=O)c2c3n(c4ccccc24)CCCO3)CC1,Beta-2 adrenergic receptor,10000.00000,427.247107,427.545,7.0,2.0,9.0,2.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816,CHEMBL4866806,CCN(CCCNC[C@H](O)c1ccc(O)c2[nH]c(=O)ccc12)C(=O...,Beta-2 adrenergic receptor,0.05012,849.410149,850.029,12.0,5.0,17.0,5.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
817,CHEMBL4857743,O=C(N[C@H](c1ccccc1)c1cccc(OCCCCCCNC[C@H](O)c2...,Beta-2 adrenergic receptor,0.03981,654.341735,654.808,10.0,5.0,15.0,5.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
818,CHEMBL5200887,COc1cc(C(=O)OCCCCNC[C@H](O)c2ccc(O)c3[nH]c(=O)...,Beta-2 adrenergic receptor,0.02512,790.357779,790.914,13.0,5.0,17.0,5.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844
819,CHEMBL4789047,Cc1ccc(CN(CC(O)COc2cccc3[nH]ccc23)C(C)(C)C)cc1,Beta-2 adrenergic receptor,1300.00000,366.230728,366.505,4.0,2.0,7.0,2.0,...,0.02,37.833095,36.176619,35.391381,37.505714,35.393905,36.280238,36.269476,125.990844,125.990844


In [20]:
mol_des.to_csv('all_descriptors.csv')