In [4]:
import joblib
import pandas as pd



In [5]:
# Parameters
model_file = "antibiotic_model.pkl"
input_file = "analysis_results/scaled_count_selected_samples.csv"
output_file = "X_sample_predictions_readable.csv"


In [6]:
model = joblib.load(model_file)
df = pd.read_csv(input_file, index_col=0)


In [7]:
model_features = model.feature_names_in_
print(len(model_features), "features expected")
print(model_features[:10])
df = df[model_features]

200 features expected
['STM2739' 'STM2714' 'PSLT053' 'STM2697' 'STM2701' 'STM2728' 'STM2738'
 'STM2236' 'STM2729' 'STM2718']


In [8]:
import numpy as np

# 1️⃣ Extract the model's training features
model_features = model.feature_names_in_

# 2️⃣ Compare to your current dataframe
current_features = df.columns.to_numpy()

# 3️⃣ Check what’s missing or extra
missing = np.setdiff1d(model_features, current_features)
extra   = np.setdiff1d(current_features, model_features)

print(f"✅ Common features: {len(np.intersect1d(model_features, current_features))}")
print(f"❌ Missing features: {len(missing)}")
print(f"⚠️ Extra features: {len(extra)}")

if len(missing) < 10 and len(extra) < 10:
    print("Missing:", missing)
    print("Extra:", extra)


✅ Common features: 200
❌ Missing features: 0
⚠️ Extra features: 0
Missing: []
Extra: []


In [9]:
y_pred = model.predict(df)

In [10]:
# Assume you know the antibiotic columns
antibiotic_cols = [
    'AMI Interpretation', 'AMP Interpretation', 'AZI Interpretation',
    'FOT Interpretation', 'TAZ Interpretation', 'CHL Interpretation',
    'CIP Interpretation', 'COL Interpretation', 'GEN Interpretation',
    'MERO Interpretation', 'NAL Interpretation', 'TET Interpretation',
    'TGC Interpretation', 'TMP Interpretation'
]

# Wrap predictions in DataFrame
predictions_df = pd.DataFrame(
    y_pred,
    columns=antibiotic_cols,
    index=df.index
)

print(predictions_df)

                                      AMI Interpretation  AMP Interpretation  \
analysis_results/aligned/ERR12322786                   1                   0   

                                      AZI Interpretation  FOT Interpretation  \
analysis_results/aligned/ERR12322786                   1                   0   

                                      TAZ Interpretation  CHL Interpretation  \
analysis_results/aligned/ERR12322786                   1                   2   

                                      CIP Interpretation  COL Interpretation  \
analysis_results/aligned/ERR12322786                   1                   0   

                                      GEN Interpretation  MERO Interpretation  \
analysis_results/aligned/ERR12322786                   2                    1   

                                      NAL Interpretation  TET Interpretation  \
analysis_results/aligned/ERR12322786                   0                   0   

                                

In [11]:
from sklearn.preprocessing import LabelEncoder

# Define the possible labels (must match what you used in training)
labels_list = ['R', 'S', 'I']  # add 'NAM' if you used it

# Create a new DataFrame to hold human-readable predictions
predictions_readable = pd.DataFrame(index=predictions_df.index)

for col in predictions_df.columns:
    le = LabelEncoder()
    le.fit(labels_list)
    predictions_readable[col] = le.inverse_transform(predictions_df[col])

# Check the results
print(predictions_readable)

# Save to CSV
predictions_readable.to_csv(output_file)
print("✅ Predictions with labels saved.")



                                     AMI Interpretation AMP Interpretation  \
analysis_results/aligned/ERR12322786                  R                  I   

                                     AZI Interpretation FOT Interpretation  \
analysis_results/aligned/ERR12322786                  R                  I   

                                     TAZ Interpretation CHL Interpretation  \
analysis_results/aligned/ERR12322786                  R                  S   

                                     CIP Interpretation COL Interpretation  \
analysis_results/aligned/ERR12322786                  R                  I   

                                     GEN Interpretation MERO Interpretation  \
analysis_results/aligned/ERR12322786                  S                   R   

                                     NAL Interpretation TET Interpretation  \
analysis_results/aligned/ERR12322786                  I                  I   

                                     TGC Interpretation 