In [1]:
import joblib
import pandas as pd
import os

In [None]:
# Define paths
models_folder = "Models"  # Path to the folder containing model files
test_data_path = "TestDataset/test_data.csv"  # Path to test data
output_folder = "FeatureWeights"  # Folder to save output CSVs
os.makedirs(output_folder, exist_ok=True)

feature_names = pd.read_csv(test_data_path, nrows=0).columns.tolist()[2:]

In [None]:
# Iterate through all joblib files in the Models folder
for model_file in os.listdir(models_folder):
    if model_file.endswith(".joblib"):
        model_path = os.path.join(models_folder, model_file)
        print(f"Processing model: {model_file}")
        
        model = joblib.load(model_path)

        # Extract coefficients
        coefficients = model.coef_[0]
        
        # Pair feature names with coefficients
        feature_weights = pd.DataFrame({
            "Feature": feature_names,
            "Weight": coefficients
        })

        feature_weights = feature_weights.sort_values(by="Weight", ascending=False)

        output_file = os.path.join(output_folder, f"{os.path.splitext(model_file)[0]}_weights.csv")
        feature_weights.to_csv(output_file, index=False)

        print(f"Saved feature weights to: {output_file}")

print("Processing complete. All feature weights saved.")

Processing model: Logistic Regression_pancreatic.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_pancreatic_weights.csv
Processing model: Logistic Regression_brain.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_brain_weights.csv
Processing model: Logistic Regression_leukemia.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_leukemia_weights.csv
Processing model: Logistic Regression_gastric.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_gastric_weights.csv
Processing model: Logistic Regression_colorectal.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_colorectal_weights.csv
Processing model: Logistic Regression_lung.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_lung_weights.csv
Processing model: Logistic Regression_breast.joblib
Saved feature weights to: FeatureWeights/Logistic Regression_breast_weights.csv
Processing model: Logistic Regression_renal.joblib
Saved fea

In [None]:
def combine_feature_weights(feature_weights_folder, output_file):
    combined_data = []  # To store all rows of the final table

    # Iterate through all CSV files in the FeatureWeights folder
    for file_name in os.listdir(feature_weights_folder):
        if file_name.endswith("_weights.csv"):
            cancer_type = file_name.split("_weights.csv")[0]
            
            # Load feature weights
            file_path = os.path.join(feature_weights_folder, file_name)
            df = pd.read_csv(file_path)
            
            # Get top 5 positive and negative weights
            top_positive = df.nlargest(5, "Weight").reset_index(drop=True)
            top_negative = df.nsmallest(5, "Weight").reset_index(drop=True)
            
            # Prepare a row for this cancer type
            row = {"Cancer Type": cancer_type}
            for i in range(5):  # Add up to 5 features and weights
                row[f"Positive Feature {i+1}"] = (
                    top_positive.at[i, "Feature"] if i < len(top_positive) else ""
                )

            for i in range(5):
                row[f"Negative Feature {i+1}"] = (
                    top_negative.at[i, "Feature"] if i < len(top_negative) else ""
                )
            
            # Add weights in separate columns
            for i in range(5):
                row[f"Positive Weight {i+1}"] = (
                    top_positive.at[i, "Weight"] if i < len(top_positive) else ""
                )
                
            for i in range(5):
                row[f"Negative Weight {i+1}"] = (
                    top_negative.at[i, "Weight"] if i < len(top_negative) else ""
                )
            combined_data.append(row)
    
    # Save to a CSV file
    combined_df = pd.DataFrame(combined_data)
    combined_df.to_csv(output_file, index=False)
    print(f"Combined feature weights saved to: {output_file}")

In [None]:
# Specify input and output paths
feature_weights_folder = "FeatureWeights"
output_file = "CombinedFeatureWeights.csv" 

combine_feature_weights(feature_weights_folder, output_file)

Combined feature weights saved to: CombinedFeatureWeights.csv
