<a href="https://colab.research.google.com/github/Praveengautam759/new-project/blob/main/Protparam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
from Bio.SeqUtils.ProtParam import ProteinAnalysis
import re

def analyze_protein(sequence):
    # Remove spaces, newlines, and non-alphabetic characters, and convert to uppercase
    sequence = re.sub(r'[^A-Z]', '', sequence.upper())

    # Handle empty sequence case
    if not sequence:
        print("Error: Empty protein sequence provided.")
        return

    # Create a ProteinAnalysis object using Biopython
    # Biopython's ProteinAnalysis expects a non-empty sequence,
    # so the check above prevents errors here.
    try:
        analysis = ProteinAnalysis(sequence)
    except ValueError as e:
        print(f"Error creating ProteinAnalysis object: {e}")
        print("Please ensure the sequence contains only valid amino acid codes (A-Z).")
        return


    # Get total length of the protein
    length = len(sequence)

    # Count each amino acid in the sequence
    aa_composition = analysis.count_amino_acids()

    # Calculate molecular weight
    mol_weight = analysis.molecular_weight()

    # Calculate theoretical isoelectric point (pI)
    pI = analysis.isoelectric_point()

    # Calculate extinction coefficients (returns a tuple: (reduced, oxidized/cystine))
    # Handle potential issues with amino acids not in extinction_coefficients if using custom values,
    # but Biopython's method handles standard W, Y, C.
    ext_coeff_reduced, ext_coeff_cystine = analysis.molar_extinction_coefficient()

    # Instability index (score < 40 = stable, > 40 = unstable)
    instability = analysis.instability_index()

    # Aliphatic index (related to thermostability)
    # This is the line that previously caused an AttributeError.
    # Using the correct method name based on Biopython documentation.
    # If this still fails, it indicates an issue with Biopython installation/version.
    try:
        aliphatic_index = analysis.aliphatic_index()
    except AttributeError:
        print("Could not calculate Aliphatic Index. The 'aliphatic_index' method might not be available in your Biopython version.")
        print("Please ensure you have a recent version of Biopython installed.")
        # Set a default or skip this calculation if the method is not found
        aliphatic_index = "N/A"


    # GRAVY = grand average of hydropathy (positive = hydrophobic, negative = hydrophilic)
    gravy = analysis.gravy()

    # Calculate total number of acidic residues (Asp D + Glu E)
    # Ensure length is not zero before dividing
    acidic = aa_composition.get('D', 0) + aa_composition.get('E', 0)
    acidic_percent = (acidic / length) * 100 if length > 0 else 0

    # Calculate total number of basic residues (Lys K + Arg R)
    # Ensure length is not zero before dividing
    basic = aa_composition.get('K', 0) + aa_composition.get('R', 0)
    basic_percent = (basic / length) * 100 if length > 0 else 0

    # Print results
    print("========== Protein Analysis ==========")
    print(f"Protein Length: {length} residues")
    print(f"Molecular Weight: {mol_weight:.2f} Da")
    print(f"Theoretical pI: {pI:.2f}")
    print(f"Extinction Coefficient (reduced Cys): {ext_coeff_reduced} M⁻¹cm⁻¹")
    print(f"Extinction Coefficient (disulfide/Cystine): {ext_coeff_cystine} M⁻¹cm⁻¹")
    print(f"Instability Index: {instability:.2f} ({'Stable' if instability < 40 else 'Unstable'})")

    # Print Aliphatic Index, handling the case where it couldn't be calculated
    if isinstance(aliphatic_index, float):
        print(f"Aliphatic Index: {aliphatic_index:.2f}")
    else:
        print(f"Aliphatic Index: {aliphatic_index}")

    print(f"GRAVY (Hydropathicity): {gravy:.3f}")
    print(f"Percentage of acidic amino acids (D + E): {acidic_percent:.2f}%")
    print(f"Percentage of basic amino acids (K + R): {basic_percent:.2f}%\n")


    # Print amino acid composition
    print("Amino Acid Composition:")
    # Ensure aa_composition is not empty before sorting and iterating
    if aa_composition:
        for aa, count in sorted(aa_composition.items()):
            # Ensure length is not zero before dividing
            percent = (count / length) * 100 if length > 0 else 0
            print(f"  {aa}: {count} ({percent:.2f}%)")
    else:
        print("  No amino acid composition available.")


# Ask user to input protein sequence
# Added a loop to re-prompt if the input is empty or contains invalid characters
while True:
    sequence_input = input("Enter the protein sequence (single-letter amino acid codes):\n")
    # Clean the sequence to check if it becomes empty after cleaning
    cleaned_sequence = re.sub(r'[^A-Z]', '', sequence_input.upper())
    if cleaned_sequence:
        analyze_protein(sequence_input)
        break # Exit loop if a valid non-empty sequence is processed
    else:
        print("Invalid or empty sequence entered. Please enter a sequence with valid amino acid codes (A-Z).")

Enter the protein sequence (single-letter amino acid codes):
MKALIILGFLFLSVAVQGKVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSEST DYGIFQINSKWWCNDGKTPNAVDGCHVSCSELMENDIAKAVACAKHIVSEQGITAWVAWKSHCRDHDVSS YVQGCTL
Could not calculate Aliphatic Index. The 'aliphatic_index' method might not be available in your Biopython version.
Please ensure you have a recent version of Biopython installed.
Protein Length: 147 residues
Molecular Weight: 16316.48 Da
Theoretical pI: 7.55
Extinction Coefficient (reduced Cys): 40450 M⁻¹cm⁻¹
Extinction Coefficient (disulfide/Cystine): 40950 M⁻¹cm⁻¹
Instability Index: 28.67 (Stable)
Aliphatic Index: N/A
GRAVY (Hydropathicity): -0.111
Percentage of acidic amino acids (D + E): 9.52%
Percentage of basic amino acids (K + R): 10.20%

Amino Acid Composition:
  A: 12 (8.16%)
  C: 8 (5.44%)
  D: 7 (4.76%)
  E: 7 (4.76%)
  F: 4 (2.72%)
  G: 10 (6.80%)
  H: 4 (2.72%)
  I: 7 (4.76%)
  K: 12 (8.16%)
  L: 13 (8.84%)
  M: 2 (1.36%)
  N: 8 (5.44%)
  P: 2 (1.36%)
  Q: 4 (2.72%