In [None]:
!pip install openslide-python
!apt-get install -y openslide-tools

In [34]:
import pandas as pd
import numpy as np
import joblib

# Load the CSV file into a DataFrame
df = pd.read_csv("/content/drive/MyDrive/AI_IHC_LANA_Positivity/02.17.24_MIL_analysis/ML/validation/validation_data/B3915_17_0_data.csv")

# Drop 'tile_index' and 'ground_truth' columns
df = df.drop(columns=['tile_index', 'ground_truth'])

# Replace NaN values with 0
df_filled = df.fillna(0)

# Replace infinity values with 0
df_filled.replace([np.inf, -np.inf], 0, inplace=True)

# Load the machine learning model
ml_model = joblib.load("/content/drive/MyDrive/AI_IHC_LANA_Positivity/02.17.24_MIL_analysis/ML/models/02.18.24_random_forest_model.pkl")

# Perform predictions using the machine learning model
predicted_probabilities = ml_model.predict_proba(df_filled)[:, 1]  # Assuming you want the probability of the positive class

# Find the maximum predicted probability
max_probability = predicted_probabilities.max()

# Set the overall ML prediction based on the maximum probability
if max_probability >= 0.5:
    overall_prediction = 1
else:
    overall_prediction = 0

# Add predicted probabilities to the DataFrame
df_filled['predicted_probability'] = predicted_probabilities

# Count the number of rows where predicted probability is >= 0.5
num_above_threshold = (predicted_probabilities >= 0.5).sum()

print("Maximum Predicted Probability:", max_probability)
print("Overall ML Prediction:", overall_prediction)
print("Number of predicted probability values >= 0.5:", num_above_threshold)

Maximum Predicted Probability: 0.06903225806451613
Overall ML Prediction: 0
Number of predicted probability values >= 0.5: 0


In [None]:
import pandas as pd
import numpy as np
import joblib
import os

# Function to process each CSV file
# Inside the process_csv function, add a condition to skip processing if DataFrame is empty
def process_csv(file_path, ml_model):
    print("Processing file:", file_path)
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    print("Loaded DataFrame shape:", df.shape)

    # Skip processing if DataFrame is empty
    if df.empty:
        print("DataFrame is empty. Skipping processing.")
        return None

    # Extract slide name from file path
    slide_name = os.path.basename(file_path)

    # Drop 'tile_index' and 'ground_truth' columns
    df = df.drop(columns=['tile_index', 'ground_truth'])

    # Replace NaN values with 0
    df_filled = df.fillna(0)

    # Replace infinity values with 0
    df_filled.replace([np.inf, -np.inf], 0, inplace=True)

    # Perform predictions using the machine learning model
    predicted_probabilities = ml_model.predict_proba(df_filled)[:, 1]  # Assuming you want the probability of the positive class

    # Find the maximum predicted probability
    max_probability = predicted_probabilities.max()

    # Set the overall ML prediction based on the maximum probability
    overall_prediction = 1 if max_probability >= 0.5 else 0

    # Count the number of rows where predicted probability is >= 0.5
    num_above_threshold = (predicted_probabilities >= 0.5).sum()

    return [slide_name, max_probability, overall_prediction, num_above_threshold]

# Load the machine learning model
ml_model = joblib.load("/content/drive/MyDrive/AI_IHC_LANA_Positivity/02.17.24_MIL_analysis/ML/models/02.18.24_random_forest_model.pkl")

# Folder containing CSV files
folder_path = "/content/drive/MyDrive/AI_IHC_LANA_Positivity/02.17.24_MIL_analysis/ML/testing/testing_data/"

# List to store results
results = []

# Iterate over CSV files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)
        result = process_csv(file_path, ml_model)
        if result is not None:
            results.append(result)

# Create a DataFrame from the results
result_df = pd.DataFrame(results, columns=['Slide Name', 'Maximum Predicted Probability', 'Overall ML Prediction', 'Number of predicted probability values >= 0.5'])

# Save the DataFrame to a new CSV file
result_df.to_csv("/content/drive/MyDrive/AI_IHC_LANA_Positivity/02.17.24_MIL_analysis/ML/testing/predictions_summary.csv", index=False)