# list of 20 standard amino acids in peptides



In [13]:
#!pip install tabulate
from tabulate import tabulate

amino_acids = [
    ["A", "Ala", "Alanine", 0, 0, 0],
    ["R", "Arg", "Arginine", 1, 0, 0],
    ["N", "Asn", "Asparagine", 1, 0, 1],
    ["D", "Asp", "Aspartic acid", 1, 0, 1],
    ["C", "Cys", "Cysteine", 0, 1, 1],
    ["E", "Glu", "Glutamic acid", 1, 0, 1],
    ["Q", "Gln", "Glutamine", 1, 0, 1],
    ["G", "Gly", "Glycine", 0, 0, 0],
    ["H", "His", "Histidine", 1, 0, 1],
    ["I", "Ile", "Isoleucine", 0, 0, 0],
    ["L", "Leu", "Leucine", 0, 0, 0],
    ["K", "Lys", "Lysine", 1, 0, 0],
    ["M", "Met", "Methionine", 0, 1, 0],
    ["F", "Phe", "Phenylalanine", 0, 0, 1],
    ["P", "Pro", "Proline", 0, 0, 0],
    ["S", "Ser", "Serine", 1, 0, 0],
    ["T", "Thr", "Threonine", 1, 0, 0],
    ["W", "Trp", "Tryptophan", 1, 0, 1],
    ["Y", "Tyr", "Tyrosine", 1, 0, 1],
    ["V", "Val", "Valine", 0, 0, 0]
]

print(tabulate(amino_acids, headers=["One-Letter Code", "Three-Letter Code", "Name", "Hydrogen Bonding", "Sulfur", "Resonance Structure"], tablefmt="fancy_grid"))

╒═══════════════════╤═════════════════════╤═══════════════╤════════════════════╤══════════╤═══════════════════════╕
│ One-Letter Code   │ Three-Letter Code   │ Name          │   Hydrogen Bonding │   Sulfur │   Resonance Structure │
╞═══════════════════╪═════════════════════╪═══════════════╪════════════════════╪══════════╪═══════════════════════╡
│ A                 │ Ala                 │ Alanine       │                  0 │        0 │                     0 │
├───────────────────┼─────────────────────┼───────────────┼────────────────────┼──────────┼───────────────────────┤
│ R                 │ Arg                 │ Arginine      │                  1 │        0 │                     0 │
├───────────────────┼─────────────────────┼───────────────┼────────────────────┼──────────┼───────────────────────┤
│ N                 │ Asn                 │ Asparagine    │                  1 │        0 │                     1 │
├───────────────────┼─────────────────────┼───────────────┼─────────────

In [9]:
import pandas as pd

file_path = 'PA_Database.csv'  
df = pd.read_csv(file_path)
peptide_seq = df.iloc[:579, 5]  

print(peptide_seq.head(10))
print(peptide_seq.tail(10))

0    GG
1    GG
2    AG
3    AG
4    AA
5    AA
6    LG
7    LG
8    FG
9    FG
Name: Pep Seq, dtype: object
569           GANPNAAG
570      AAAAGGGEIKVAV
571      AAAAGGGEIKVAV
572             KKLLAK
573         RGDSKKLLAK
574    AAAAGGGLRKKLGKA
575    AAAAGGGLLGARKKK
576            AAAAAAD
577            AAAAAAK
578           GAAVILRR
Name: Pep Seq, dtype: object


In [10]:
#!pip install biopython
from Bio.SeqUtils import ProtParam

def calculate_hydrophobicity(sequence):
    analyzed_seq = ProtParam.ProteinAnalysis(sequence)
    return analyzed_seq.gravy()

hydrophobicities = peptide_seq.apply(calculate_hydrophobicity)

df['Hydrophobicity'] = hydrophobicities
new_file_path = 'Modified_PA_Database.csv'
df.to_csv(new_file_path, index=False)

print(df[['Pep Seq', 'Hydrophobicity']].head(10))
print(df.iloc[569:579][['Pep Seq', 'Hydrophobicity']])

  Pep Seq  Hydrophobicity
0      GG            -0.4
1      GG            -0.4
2      AG             0.7
3      AG             0.7
4      AA             1.8
5      AA             1.8
6      LG             1.7
7      LG             1.7
8      FG             1.2
9      FG             1.2
             Pep Seq  Hydrophobicity
569         GANPNAAG       -0.500000
570    AAAAGGGEIKVAV        1.023077
571    AAAAGGGEIKVAV        1.023077
572           KKLLAK       -0.383333
573       RGDSKKLLAK       -1.150000
574  AAAAGGGLRKKLGKA       -0.080000
575  AAAAGGGLLGARKKK       -0.080000
576          AAAAAAD        1.042857
577          AAAAAAK        0.985714
578         GAAVILRR        0.837500


![Amino Acids Image](https://www.reagent.co.uk/wp-content/uploads/comm/n-/common-amino-acids.webp)

In [11]:
data = [
    ["Alanine (A)", "None", "None", "None", "None", "None"],
    ["Arginine (R)", "None", "None", "None", "None", "None"],
    ["Asparagine (N)", "None", "None", "None", "None", "None"],
    ["Aspartic Acid (D)", "None", "None", "None", "None", "None"],
    ["Cysteine (C)", "None", "None", "None", "None", "None"],
    ["Glutamine (Q)", "None", "None", "None", "None", "None"],
    ["Glutamic Acid (E)", "None", "None", "None", "None", "None"],
    ["Glycine (G)", "None", "None", "None", "None", "None"],
    ["Histidine (H)", "None", "None", "None", "None", "None"],
    ["Isoleucine (I)", "None", "None", "None", "None", "None"],
    ["Leucine (L)", "None", "None", "None", "None", "None"],
    ["Lysine (K)", "None", "None", "None", "None", "None"],
    ["Methionine (M)", "None", "None", "None", "None", "None"],
    ["Phenylalanine (F)", "None", "None", "None", "None", "None"],
    ["Proline (P)", "None", "None", "None", "None", "None"],
    ["Serine (S)", "None", "None", "None", "None", "None"],
    ["Threonine (T)", "None", "None", "None", "None", "None"],
    ["Tryptophan (W)", "None", "None", "None", "None", "None"],
    ["Tyrosine (Y)", "None", "None", "None", "None", "None"],
    ["Valine (V)", "None", "None", "None", "None", "None"]
]

headers = ["Amino Acid", "Functional Group", "Donors (Hydrogen Bonding)", "Acceptors (Hydrogen Bonding)", "Notes", "Total H-Bond"]


table = tabulate(data, headers=headers, tablefmt="fancy_grid")
print(table)



╒═══════════════════╤════════════════════╤═════════════════════════════╤════════════════════════════════╤═════════╤════════════════╕
│ Amino Acid        │ Functional Group   │ Donors (Hydrogen Bonding)   │ Acceptors (Hydrogen Bonding)   │ Notes   │ Total H-Bond   │
╞═══════════════════╪════════════════════╪═════════════════════════════╪════════════════════════════════╪═════════╪════════════════╡
│ Alanine (A)       │ None               │ None                        │ None                           │ None    │ None           │
├───────────────────┼────────────────────┼─────────────────────────────┼────────────────────────────────┼─────────┼────────────────┤
│ Arginine (R)      │ None               │ None                        │ None                           │ None    │ None           │
├───────────────────┼────────────────────┼─────────────────────────────┼────────────────────────────────┼─────────┼────────────────┤
│ Asparagine (N)    │ None               │ None                      

In [14]:
#!pip install rdkit
from rdkit import Chem
from rdkit.Chem import Crippen

def calculate_logP(smiles):
    """Calculates LogP (hydrophobicity) of a molecule from its SMILES."""
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        return Crippen.MolLogP(molecule)
    else:
        return None

# Example: SMILES of hydrophobic tail (C16 alkyl chain)
hydrophobic_tail_smiles = "CCCCCCCCCCCCCCCC"  # Example for C16
tail_logP = calculate_logP(hydrophobic_tail_smiles)

print(f"LogP of the hydrophobic tail: {tail_logP}")

ModuleNotFoundError: No module named 'rdkit'

In [24]:
import pandas as pd
import requests

def get_logP(smiles):
    """Fetch LogP from PubChem for a given SMILES string."""
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/property/XLogP/JSON"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        return data["PropertyTable"]["Properties"][0].get("XLogP", None)
    else:
        return None

# Load the CSV file (make sure it's in the same folder as the script)
df = pd.read_csv("PA_Database.csv")

# Apply LogP calculation to all rows
df["N-Term LogP"] = df["N-Term SMILES"].apply(get_logP)

# Print only the first 20 rows
print(df[["N-Term SMILES", "N-Term LogP"]].head(20))

# Save only the first 580 rows to a new CSV
#df.head(580).to_csv("Modified_PA_Database.csv", index=False)
df.iloc[0:20].to_csv("Modified_PA_Database.csv", index=False)
df.iloc[20:40].to_csv("Modified_PA_Database.csv", index=False)


print(" First 20 rows printed. First 580 rows saved to Modified_PA_Database.csv")

KeyboardInterrupt: 