In [9]:
# Setting file path
import os # Import the os module for file path handling

# Define the directory where your CSV files are located
data_directory = 'C:/Users/HP/Downloads/Machine-Learning-for-Paper-main/Machine-Learning-for-Paper-main/ml_data/' ## Remember to Change the path directory with your path 

In [10]:
import pandas as pd

In [11]:
# Define the file name separately
file_name = 'feature_importances-processed.csv'

# Combine the directory path and file name to create the full file path
csv_file_path = os.path.join(data_directory, file_name)

# Check if the file path exists before loading the data
if os.path.exists(csv_file_path):
    # Load your data from CSV
    data = pd.read_csv(csv_file_path, header=None, names=['residue_1', 'residue_2', 'pairwise_distance_importance'])

    # Get a list of unique residues from both columns
    unique_residues = pd.unique(data[['residue_1', 'residue_2']].values.ravel('K'))

# Initialize a dictionary to store the cumulative importance for each residue
residue_importance_dict = {}

# Loop through each unique residue
for residue_id in unique_residues:
    # Filter data for the current residue in either column
    residue_data = data[(data['residue_1'] == residue_id) | (data['residue_2'] == residue_id)]
    
    # Calculate the sum of pairwise distance feature importances and divide by 2
    residue_importance = sum(residue_data['pairwise_distance_importance']) / 2
    
    # Store the importance for the current residue
    residue_importance_dict[residue_id] = residue_importance

# Print or analyze the residue-specific importances
for residue_id, importance in residue_importance_dict.items():
    print(f"Residue {residue_id} Importance: {importance}")

Residue ASP116 Importance: 0.0155
Residue ASP120 Importance: 0.0
Residue ASP127 Importance: 0.0
Residue ASP160 Importance: 0.0
Residue ASP199 Importance: 0.0002
Residue ASP205 Importance: 0.01005
Residue ASP211 Importance: 0.0
Residue ASP218 Importance: 0.02115
Residue ASP251 Importance: 0.00655
Residue ASP306 Importance: 0.0
Residue ASP322 Importance: 0.0
Residue ASP327 Importance: 0.0
Residue ASP33 Importance: 0.0
Residue ASP345 Importance: 0.0
Residue ASP367 Importance: 0.00505
Residue ASP392 Importance: 0.0
Residue ASP396 Importance: 0.0
Residue ASP39 Importance: 0.0116
Residue ASP424 Importance: 0.0
Residue ASP431 Importance: 0.0
Residue ASP463 Importance: 0.0037
Residue ASP46 Importance: 0.0148
Residue ASP476 Importance: 0.0067
Residue ASP478 Importance: 0.0062
Residue ASP47 Importance: 0.0132
Residue ASP511 Importance: 0.0
Residue ASP525 Importance: 0.0
Residue ASP551 Importance: 0.0
Residue ASP555 Importance: 0.0
Residue ASP565 Importance: 0.01
Residue ASP598 Importance: 0.0144

In [12]:
# Define the directory where the sorted CSV file will be saved
output_file_name = os.path.join(data_directory, 'sorted_residue_importances.csv')

# Create the full file path by combining the directory and file name
output_file_path = os.path.join(output_directory, output_file_name)

sorted_residue_importances = sorted(residue_importance_dict.items(), key=lambda x: x[1], reverse=True)


# Create a DataFrame from the sorted importances
sorted_data = pd.DataFrame(sorted_residue_importances, columns=['Residue', 'Importance'])

# Write the sorted residue importances to the CSV file
sorted_data.to_csv(output_file_path, index=False)

print(f"Sorted residue importances have been written to '{output_file_path}'.")


Sorted residue importances have been written to 'C:/Users/HP/Downloads/Machine-Learning-for-Paper-main/Machine-Learning-for-Paper-main/ml_data/sorted_residue_importances.csv'.
