In [None]:
import pandas as pd

In [None]:
data_file = "Darsh Suhas Ambade - atom_cord.pdb.txt"

colspecs = [
    (0, 6),   # Record type (ATOM)
    (6, 11),  # Atom serial number
    (12, 16), # Atom name
    (17, 20), # Residue name
    (21, 22), # Chain ID
    (22, 26), # Residue sequence number (RESID)
    (30, 38), # X coordinate
    (38, 46), # Y coordinate
    (46, 54)  # Z coordinate
]

column_names = ["Record", "AtomSerial", "AtomName", "ResName", "ChainID", "ResID", "X", "Y", "Z"]

df = pd.read_fwf("Darsh Suhas Ambade - atom_cord.pdb.txt", colspecs=colspecs, names=column_names)

pdb_dict = {}

for _, row in df.iterrows():
    resid = (row["ResID"])
    atom_name = row["AtomName"].strip()
    res_name = row["ResName"].strip()
    coordinates = [row["X"], row["Y"], row["Z"]]

    if resid not in pdb_dict:
        pdb_dict[resid] = {"resname": res_name}

    pdb_dict[resid][atom_name] = coordinates

print(pdb_dict)

print("Test 1", pdb_dict[5]['resname'])
print("Test 2", pdb_dict[5]['CA'])

Test 1 LEU
Test 2 [52.675, -23.892, 93.371]


In [None]:
residue_composition = {}

for resid in pdb_dict:
    resname = pdb_dict[resid]["resname"]

    if resname in residue_composition:
        residue_composition[resname] += 1
    else:
        residue_composition[resname] = 1 # first occurence

print(residue_composition)

{'LEU': 27, 'SER': 17, 'ALA': 32, 'GLU': 28, 'ASP': 26, 'LYS': 30, 'VAL': 19, 'ARG': 18, 'MET': 8, 'ILE': 26, 'ASN': 15, 'GLY': 17, 'THR': 20, 'GLN': 13, 'HIS': 6, 'TYR': 13, 'CYS': 9, 'PHE': 19, 'TRP': 3, 'PRO': 4}


In [None]:
residue_atom_counts = {}
for resid in pdb_dict:
    atom_count = len(pdb_dict[resid]) - 1
    residue_atom_counts[resid] = atom_count

same_atom_count = len(set(residue_atom_counts.values())) == 1
if same_atom_count:
    print("All residues have the same number of atoms:", list(residue_atom_counts.values())[0])
else:
    print("Residues have different numbers of atoms")
    
    atom_count_distribution = {}
    for count in residue_atom_counts.values():
        if count not in atom_count_distribution:
            atom_count_distribution[count] = 1
        else:
            atom_count_distribution[count] += 1
    
    print("Distribution of atom counts:")
    for count, frequency in atom_count_distribution.items():
        print(count, "atoms:",frequency, "residues")

total_atoms = sum(residue_atom_counts.values())
total_residues = len(residue_atom_counts)
average_atoms = total_atoms / total_residues
print("Average number of atoms per residue:", average_atoms)

atoms_by_resname = {}
for resid in pdb_dict:
    resname = pdb_dict[resid]["resname"]
    atom_count = residue_atom_counts[resid]
    
    if resname not in atoms_by_resname:
        atoms_by_resname[resname] = []
    atoms_by_resname[resname].append(atom_count)

print("Atom counts by residue type:")
for resname, counts in atoms_by_resname.items():
    avg = sum(counts) / len(counts)
    same = len(set(counts)) == 1
    print(f"{resname}: {'Same' if same else 'Different'} atom counts, average: {avg:.2f}")

Residues have different numbers of atoms
Distribution of atom counts:
5 atoms: 33 residues
6 atoms: 26 residues
9 atoms: 71 residues
8 atoms: 101 residues
7 atoms: 43 residues
11 atoms: 36 residues
4 atoms: 17 residues
10 atoms: 7 residues
12 atoms: 13 residues
14 atoms: 3 residues
Average number of atoms per residue: 8.002857142857144
Atom counts by residue type:
LEU: Different atom counts, average: 7.89
SER: Same atom counts, average: 6.00
ALA: Same atom counts, average: 5.00
GLU: Same atom counts, average: 9.00
ASP: Same atom counts, average: 8.00
LYS: Same atom counts, average: 9.00
VAL: Same atom counts, average: 7.00
ARG: Same atom counts, average: 11.00
MET: Same atom counts, average: 8.00
ILE: Same atom counts, average: 8.00
ASN: Same atom counts, average: 8.00
GLY: Same atom counts, average: 4.00
THR: Same atom counts, average: 7.00
GLN: Same atom counts, average: 9.00
HIS: Same atom counts, average: 10.00
TYR: Same atom counts, average: 12.00
CYS: Same atom counts, average: 6

In [30]:
import math

ca_coords = []
for resid in sorted(pdb_dict.keys()):
    if 'CA' in pdb_dict[resid]:
        ca_coords.append(pdb_dict[resid]['CA'])

total_distance = 0
count = 0

for i in range(len(ca_coords) - 1):
    v1 = ca_coords[i]
    v2 = ca_coords[i + 1]
    vdiff = [v2[0] - v1[0], v2[1] - v1[1], v2[2] - v1[2]]
    distance = math.sqrt(vdiff[0]**2 + vdiff[1]**2 + vdiff[2]**2)
    total_distance += distance
    count += 1

average_distance = total_distance / count if count > 0 else 0
print(f"Average distance between consecutive CA atoms: {average_distance:.3f}")

Average distance between consecutive CA atoms: 3.800
