In [1]:
!pip install scanpy
!pip install flwr
!pip install autogluon


Collecting scanpy
  Downloading scanpy-1.11.4-py3-none-any.whl.metadata (9.2 kB)
Collecting anndata>=0.8 (from scanpy)
  Downloading anndata-0.12.2-py3-none-any.whl.metadata (9.6 kB)
Collecting legacy-api-wrap>=1.4.1 (from scanpy)
  Downloading legacy_api_wrap-1.4.1-py3-none-any.whl.metadata (2.1 kB)
Collecting session-info2 (from scanpy)
  Downloading session_info2-0.2.3-py3-none-any.whl.metadata (3.4 kB)
Collecting array-api-compat>=1.7.1 (from anndata>=0.8->scanpy)
  Downloading array_api_compat-1.12.0-py3-none-any.whl.metadata (2.5 kB)
Collecting zarr!=3.0.*,>=2.18.7 (from anndata>=0.8->scanpy)
  Downloading zarr-3.1.3-py3-none-any.whl.metadata (10 kB)
Collecting donfig>=0.8 (from zarr!=3.0.*,>=2.18.7->anndata>=0.8->scanpy)
  Downloading donfig-0.8.1.post1-py3-none-any.whl.metadata (5.0 kB)
Collecting numcodecs>=0.14 (from numcodecs[crc32c]>=0.14->zarr!=3.0.*,>=2.18.7->anndata>=0.8->scanpy)
  Downloading numcodecs-0.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.me

Collecting autogluon
  Downloading autogluon-1.4.0-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.core==1.4.0 (from autogluon.core[all]==1.4.0->autogluon)
  Downloading autogluon.core-1.4.0-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.features==1.4.0 (from autogluon)
  Downloading autogluon.features-1.4.0-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.4.0 (from autogluon.tabular[all]==1.4.0->autogluon)
  Downloading autogluon.tabular-1.4.0-py3-none-any.whl.metadata (16 kB)
Collecting autogluon.multimodal==1.4.0 (from autogluon)
  Downloading autogluon.multimodal-1.4.0-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.timeseries==1.4.0 (from autogluon.timeseries[all]==1.4.0->autogluon)
  Downloading autogluon.timeseries-1.4.0-py3-none-any.whl.metadata (12 kB)
Collecting boto3<2,>=1.10 (from autogluon.core==1.4.0->autogluon.core[all]==1.4.0->autogluon)
  Downloading boto3-1.40.50-py3-none-any.whl.metadata (6.7 kB)
Collecting autogluon.common==1.4

## Dataset

In [2]:
import os
os.environ["SCIPY_ARRAY_API"] = "1"

import gdown
import numpy as np
import pandas as pd
import anndata as ad
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from scipy.sparse import issparse
import matplotlib.pyplot as plt
import seaborn as sns
import random
import torch
import torch.nn as nn
import lightgbm as lgb
import joblib
from sklearn.ensemble import RandomForestClassifier


# Config
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

os.environ['PYTHONHASHSEED'] = str(SEED)

file_id = "110eYMgseyD32YIS9xOMbOpJ76wnDXahR"
gdown.download(f"https://drive.google.com/uc?id={file_id}", output="TCGA_BRCA_RNA_with_TinX.h5ad", quiet=False)


Using device: cuda


Downloading...
From (original): https://drive.google.com/uc?id=110eYMgseyD32YIS9xOMbOpJ76wnDXahR
From (redirected): https://drive.google.com/uc?id=110eYMgseyD32YIS9xOMbOpJ76wnDXahR&confirm=t&uuid=493c4a86-198a-464c-abf1-83a3885d487e
To: /content/TCGA_BRCA_RNA_with_TinX.h5ad
100%|██████████| 574M/574M [00:10<00:00, 53.5MB/s]


'TCGA_BRCA_RNA_with_TinX.h5ad'

In [3]:
import scanpy as sc
import pandas as pd

# === Paths ===
adata_path = "/content/TCGA_BRCA_RNA_with_TinX.h5ad"
test_csv_path = "/content/test_metadata_THENEWEST - 28.csv"
train_h5ad_path = "/content/RNA_train.h5ad"
test_h5ad_path = "/content/RNA_test.h5ad"

# === Label mappings ===
label_map = {
    "Stage I": 0,
    "Stage II": 1,
    "Stage III": 2,
    "Stage IV": 3,
}
stage_map = {
    "Stage1": "Stage I",
    "Stage2": "Stage II",
    "Stage3": "Stage III",
    "Stage4": "Stage IV",
}

# === Load .h5ad data ===
adata = sc.read_h5ad(adata_path)
adata.obs["patient_id"] = adata.obs["patient_id"].astype(str)

# === Load test_metadata.csv and fix label format ===
test_df = pd.read_csv(test_csv_path)
test_df["patient_id"] = test_df["patient_id"].astype(str)
test_df["label"] = test_df["label"].str.strip()
test_df["stage"] = test_df["label"].map(stage_map)  # Convert e.g. "Stage4" → "Stage IV"

# === 🔍 Check patient ID consistency ===
csv_patient_ids = set(test_df["patient_id"])
adata_patient_ids = set(adata.obs["patient_id"])
missing_in_adata = csv_patient_ids - adata_patient_ids
if missing_in_adata:
    print("The following patient_id(s) exist in test_metadata.csv but were not found in .h5ad:")
    print(missing_in_adata)
else:
    print("All patient_id(s) in test_metadata.csv are present in the .h5ad dataset.")

# === 1. Extract test set by patient ID ===
test_patients = set(test_df["patient_id"])
is_test = adata.obs["patient_id"].isin(test_patients)
adata_test = adata[is_test].copy()

# De-duplicate: keep only one sample per patient_id
adata_test = adata_test[adata_test.obs.groupby("patient_id").head(1).index]

# Assign correct stage labels from test_metadata.csv
patient_to_stage = dict(zip(test_df["patient_id"], test_df["stage"]))
adata_test.obs["stage"] = adata_test.obs["patient_id"].map(patient_to_stage)

# === 🔍 Check for unmapped test samples ===
unmapped = adata_test.obs[adata_test.obs["stage"].isna()]
if not unmapped.empty:
    print("The following patient_id(s) were found in .h5ad but failed to map a stage label:")
    print(unmapped["patient_id"].tolist())
else:
    print("All test samples successfully mapped to stage labels.")

# === 2. The rest are used as training set ===
adata_train = adata[~is_test].copy()

# === Save output files ===
adata_train.write(train_h5ad_path)
adata_test.write(test_h5ad_path)

# === Final summary ===
print("Training and test sets saved:")
print("Test samples:", adata_test.shape[0], "→", test_h5ad_path)
print("Train samples:", adata_train.shape[0], "→", train_h5ad_path)
print("Test label distribution:")
print(adata_test.obs["stage"].value_counts())

All patient_id(s) in test_metadata.csv are present in the .h5ad dataset.
All test samples successfully mapped to stage labels.


  adata_test.obs["stage"] = adata_test.obs["patient_id"].map(patient_to_stage)


Training and test sets saved:
Test samples: 28 → /content/RNA_test.h5ad
Train samples: 1202 → /content/RNA_train.h5ad
Test label distribution:
stage
Stage II     14
Stage III     7
Stage I       6
Stage IV      1
Name: count, dtype: int64


## Train

In [5]:
import numpy as np
import pandas as pd
import anndata as ad
import joblib
from scipy.sparse import issparse
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from autogluon.tabular import TabularDataset, TabularPredictor

# === File paths ===
scaler_path = "/content/RNA_scaler.pkl"
selector_path = "/content/RNA_selector_kbest.pkl"
autogluon_model_path = "/content/autogluon_rna_model"

# === 1. Load data ===
adata = ad.read_h5ad(train_h5ad_path)
X = adata.X.toarray() if issparse(adata.X) else adata.X
y_raw = adata.obs["stage"].values
label_map = {"Stage I": 0, "Stage II": 1, "Stage III": 2, "Stage IV": 3}
label_names = list(label_map.keys())
y = np.array([label_map.get(s, 3) for s in y_raw])  # Default to Stage IV if unknown

# === 2. Sample-level cleaning ===
expr_sum = X.sum(axis=1)
z_scores = (expr_sum - np.mean(expr_sum)) / np.std(expr_sum)
mask = np.abs(z_scores) < 3
X = X[mask]
y = y[mask]

# === 3. Scaling ===
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, scaler_path)

# === 4. Train-validation split ===
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# === 5. SelectKBest Feature Selection ===
selector = SelectKBest(score_func=f_classif, k=500)
X_train_sel = selector.fit_transform(X_train, y_train)
X_val_sel = selector.transform(X_val)
joblib.dump(selector, selector_path)

# === 6. SMOTE Over-sampling ===
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_sel, y_train)

# === 7. Prepare for AutoGluon ===
train_df = pd.DataFrame(X_resampled)
train_df["stage"] = y_resampled
val_df = pd.DataFrame(X_val_sel)
val_df["stage"] = y_val

train_data = TabularDataset(train_df)
val_data = TabularDataset(val_df)

# === 8. AutoGluon Training with GPU models and more time ===
predictor = TabularPredictor(
    label="stage",
    path=autogluon_model_path,
    eval_metric="f1_weighted",
    problem_type="multiclass"
)
predictor.fit(
    train_data=train_data,
    tuning_data=val_data,
    use_bag_holdout=True,
    time_limit=1200,
    presets="best_quality",
    hyperparameters={
        "GBM": {"ag_args_fit": {"hyperparameter_tune_kwargs": "auto"}},
        "CAT": {"ag_args_fit": {"hyperparameter_tune_kwargs": "auto"}},
        "XGB": {"ag_args_fit": {"hyperparameter_tune_kwargs": "auto"}},
        "RF":  {"ag_args_fit": {"hyperparameter_tune_kwargs": "auto"}}
    }
)


  f = msb / msw
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sat Sep  6 09:54:41 UTC 2025
CPU Count:          2
Memory Avail:       6.70 GB / 12.67 GB (52.8%)
Disk Space Avail:   65.84 GB / 112.64 GB (58.4%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to False. Reason: Skip dynamic_stacking when use_bag_holdout is enabled. (use_bag_holdout=True)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ... Time limit = 1200s
AutoGluon will save models to "/content/autogluon_rna_model"
Train Data Rows:    2204
Train Data Columns: 500
Tuning Data Rows:    241
Tuning Data Columns: 500
Label Column:       stage
Problem Type:       multiclass
Preprocessing data ...
Train Data Class Count: 4
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFea

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7e53a931fbc0>

In [6]:
# === 9. Evaluation ===
val_preds = predictor.predict(val_data.drop(columns=["stage"]))
print("Validation Classification Report:")
print(classification_report(val_data["stage"], val_preds, target_names=label_names))
print("Confusion Matrix:")
print(pd.DataFrame(confusion_matrix(val_data["stage"], val_preds), index=label_names, columns=label_names))

Validation Classification Report:
              precision    recall  f1-score   support

     Stage I       0.61      0.97      0.75        39
    Stage II       0.84      0.77      0.80       139
   Stage III       0.62      0.56      0.59        54
    Stage IV       0.33      0.11      0.17         9

    accuracy                           0.73       241
   macro avg       0.60      0.60      0.58       241
weighted avg       0.73      0.73      0.72       241

Confusion Matrix:
           Stage I  Stage II  Stage III  Stage IV
Stage I         38         0          1         0
Stage II        17       107         13         2
Stage III        6        18         30         0
Stage IV         1         3          4         1


In [7]:
predictor.leaderboard(val_data, silent=True)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,XGBoost_BAG_L1,0.722073,0.722073,f1_weighted,0.076245,0.395691,54.515961,0.076245,0.395691,54.515961,1,True,4
1,WeightedEnsemble_L2,0.722073,0.722073,f1_weighted,0.077626,0.39917,54.91047,0.001381,0.003479,0.394509,2,True,5
2,WeightedEnsemble_L3,0.722073,0.722073,f1_weighted,0.077865,0.398871,54.807939,0.00162,0.00318,0.291978,3,True,9
3,CatBoost_BAG_L1,0.706863,0.706863,f1_weighted,0.147351,0.365555,275.729341,0.147351,0.365555,275.729341,1,True,3
4,LightGBM_BAG_L1,0.691928,0.691928,f1_weighted,0.324957,0.282369,432.75196,0.324957,0.282369,432.75196,1,True,1
5,RandomForest_BAG_L2,0.668496,0.668496,f1_weighted,0.758078,1.243974,781.833327,0.100327,0.100643,9.797249,2,True,7
6,RandomForest_BAG_L1,0.582469,0.582469,f1_weighted,0.109198,0.099715,9.038815,0.109198,0.099715,9.038815,1,True,2
7,LightGBM_BAG_L2,0.572998,0.572998,f1_weighted,0.836245,1.43107,1109.690691,0.178493,0.287739,337.654614,2,True,6
8,CatBoost_BAG_L2,0.549296,0.549296,f1_weighted,0.798616,1.388709,808.531157,0.140865,0.245378,36.49508,2,True,8


In [8]:
predictor.fit_summary()

*** Summary of fit() ***
Estimated performance of each model:
                 model  score_val  eval_metric  pred_time_val     fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0       XGBoost_BAG_L1   0.722073  f1_weighted       0.395691    54.515961                0.395691          54.515961            1       True          4
1  WeightedEnsemble_L3   0.722073  f1_weighted       0.398871    54.807939                0.003180           0.291978            3       True          9
2  WeightedEnsemble_L2   0.722073  f1_weighted       0.399170    54.910470                0.003479           0.394509            2       True          5
3      CatBoost_BAG_L1   0.706863  f1_weighted       0.365555   275.729341                0.365555         275.729341            1       True          3
4      LightGBM_BAG_L1   0.691928  f1_weighted       0.282369   432.751960                0.282369         432.751960            1       True          1
5  RandomForest_BAG_

{'model_types': {'LightGBM_BAG_L1': 'StackerEnsembleModel_LGB',
  'RandomForest_BAG_L1': 'StackerEnsembleModel_RF',
  'CatBoost_BAG_L1': 'StackerEnsembleModel_CatBoost',
  'XGBoost_BAG_L1': 'StackerEnsembleModel_XGBoost',
  'WeightedEnsemble_L2': 'WeightedEnsembleModel',
  'LightGBM_BAG_L2': 'StackerEnsembleModel_LGB',
  'RandomForest_BAG_L2': 'StackerEnsembleModel_RF',
  'CatBoost_BAG_L2': 'StackerEnsembleModel_CatBoost',
  'WeightedEnsemble_L3': 'WeightedEnsembleModel'},
 'model_performance': {'LightGBM_BAG_L1': 0.6919283042459129,
  'RandomForest_BAG_L1': 0.582469153410113,
  'CatBoost_BAG_L1': 0.7068626814650701,
  'XGBoost_BAG_L1': 0.7220725321003851,
  'WeightedEnsemble_L2': 0.7220725321003851,
  'LightGBM_BAG_L2': 0.572997736710296,
  'RandomForest_BAG_L2': 0.6684961103873636,
  'CatBoost_BAG_L2': 0.5492963985546657,
  'WeightedEnsemble_L3': 0.7220725321003851},
 'model_best': 'WeightedEnsemble_L2',
 'model_paths': {'LightGBM_BAG_L1': ['LightGBM_BAG_L1'],
  'RandomForest_BAG_L1'

## Client

In [13]:
import numpy as np
import pandas as pd
import anndata as ad
import joblib
import json
from scipy.sparse import issparse
from sklearn.metrics import classification_report, confusion_matrix
import flwr as fl
from autogluon.tabular import TabularPredictor

# ===== Parameter Settings =====
test_h5ad_path   = "/content/RNA_test.h5ad"
SERVER_ADDRESS   = "192.168.0.6:8080"
MODALITY         = "RNA"
WEIGHT           = 0.3

label_map = {"Stage I": 0, "Stage II": 1, "Stage III": 2, "Stage IV": 3}
label_names = list(label_map.keys())
int_to_stage = {v: k for k, v in label_map.items()}

class RNAClient(fl.client.NumPyClient):
    def __init__(self, test_h5ad_path, scaler_path, selector_path, model_path, modality, weight):
        self.modality = modality
        self.weight = weight
        self.rows = []
        self._load_and_predict(test_h5ad_path, scaler_path, selector_path, model_path)

    def _load_and_predict(self, h5ad_path, scaler_path, selector_path, model_path):
        # === 1. Load test data ===
        adata = ad.read_h5ad(h5ad_path)
        X = adata.X.toarray() if issparse(adata.X) else adata.X
        y_raw = adata.obs["stage"].values
        pids = adata.obs["patient_id"].astype(str).values
        y_true = np.array([label_map.get(s, 3) for s in y_raw])

         # === 2. Load scaler & selector ===
        scaler = joblib.load(scaler_path)
        selector = joblib.load(selector_path)

        X_scaled = scaler.transform(X)
        X_selected = selector.transform(X_scaled)

        # === 3. Wrap as DataFrame for AutoGluon ===
        df = pd.DataFrame(X_selected)

        # === 4. Load AutoGluon model and predict ===
        predictor = TabularPredictor.load(model_path)

        y_pred = predictor.predict(df)
        y_prob = predictor.predict_proba(df)

        # AutoGluon returns string labels ("0", "1", ...) → map to int
        y_pred_int = y_pred.astype(int).values

        print("Server_RNA_test Classification Report:")
        print(classification_report(y_true, y_pred_int, target_names=label_names))
        print("Server_RNA_test Confusion Matrix:")
        print(pd.DataFrame(confusion_matrix(y_true, y_pred_int), index=label_names, columns=label_names))

        # === 5. Format to JSON ===
        for i, probs in enumerate(y_prob.values):
            self.rows.append({
                "patient_id": pids[i],
                "probs": probs.tolist(),
                "modality": self.modality,
                "weight": self.weight
            })

        print(f"\n{len(self.rows)} predictions have been generated.")

    def get_parameters(self, config): return []
    def fit(self, parameters, config): return [], 0, {}
    def evaluate(self, parameters, config):
        task = config.get("task", "")
        metrics = {}
        if task == "predict":
            print(f"\n📤 RNA client uploads {len(self.rows)} predictions.")
            metrics = {
                "preds_json": json.dumps(self.rows).encode("utf-8")
            }
        return 0.0, len(self.rows), metrics

# ===== Start client =====
client = RNAClient(test_h5ad_path, scaler_path, selector_path, autogluon_model_path, MODALITY, WEIGHT)

# fl.client.start_numpy_client(server_address=SERVER_ADDRESS, client=client)

Server_RNA_test Classification Report:
              precision    recall  f1-score   support

     Stage I       0.57      0.67      0.62         6
    Stage II       0.71      0.71      0.71        14
   Stage III       0.71      0.71      0.71         7
    Stage IV       0.00      0.00      0.00         1

    accuracy                           0.68        28
   macro avg       0.50      0.52      0.51        28
weighted avg       0.66      0.68      0.67        28

Server_RNA_test Confusion Matrix:
           Stage I  Stage II  Stage III  Stage IV
Stage I          4         2          0         0
Stage II         3        10          1         0
Stage III        0         2          5         0
Stage IV         0         0          1         0

28 predictions have been generated.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return datetime.utcnow().replace(tzinfo=utc)


## Fusion

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# === 1. Load prediction and metadata ===
df = pd.read_csv("/content/predictions_fused.csv")
meta = pd.read_csv("/content/test_metadata_THENEWEST - 28.csv")

# === 2. Process labels ===
stage_map = {"Stage1": 0, "Stage2": 1, "Stage3": 2, "Stage4": 3}
meta["label"] = meta["label"].str.strip()
meta["stage"] = meta["label"].map(stage_map)

# Merge true stage labels into the predictions dataframe
df = df.merge(meta[["patient_id", "stage"]], on="patient_id")
df = df.dropna(subset=["probs_fused", "stage"])  # Drop entries without stage or prediction

# === 3. Extract features and labels ===
X = df["probs_fused"].apply(eval).tolist()  # Convert stringified lists into actual lists
X = np.array(X)
y = df["stage"].values

# === 4. Separate Stage 4 samples to use as a dedicated test set ===
mask_stage4 = y == 3
X_stage4 = X[mask_stage4]
y_stage4 = y[mask_stage4]

# === 5. Split the remaining data into training and test sets ===
X_rest = X[~mask_stage4]
y_rest = y[~mask_stage4]

X_train, X_test_rest, y_train, y_test_rest = train_test_split(
    X_rest, y_rest, test_size=0.2, random_state=42, stratify=y_rest
)

# === 6. Combine with Stage 4 to form final test set ===
X_test = np.concatenate([X_test_rest, X_stage4])
y_test = np.concatenate([y_test_rest, y_stage4])

# === 7. Train a logistic regression fusion model ===
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# === 8. Evaluate on training set ===
y_train_pred = clf.predict(X_train)
print("Server_lg Train Classification Report:")
print(classification_report(y_train, y_train_pred))
print("Server_lgTrain Confusion Matrix:")
print(confusion_matrix(y_train, y_train_pred))


Server_lg Train Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.60      0.75         5
           1       0.67      0.91      0.77        11
           2       0.67      0.40      0.50         5

    accuracy                           0.71        21
   macro avg       0.78      0.64      0.67        21
weighted avg       0.75      0.71      0.70        21

Server_lgTrain Confusion Matrix:
[[ 3  2  0]
 [ 0 10  1]
 [ 0  3  2]]


  opt_res = optimize.minimize(
  return datetime.utcnow().replace(tzinfo=utc)


In [11]:

# === 9. Evaluate on test set ===
y_test_pred = clf.predict(X_test)

label_names = ["Stage I", "Stage II", "Stage III", "Stage IV"]
print("Server_lg Test Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_names))
print("Server_lg Test Confusion Matrix:")
print(pd.DataFrame(confusion_matrix(y_test, y_test_pred), index=label_names, columns=label_names))

Server_lg Test Classification Report:
              precision    recall  f1-score   support

     Stage I       1.00      1.00      1.00         1
    Stage II       0.75      1.00      0.86         3
   Stage III       0.50      0.50      0.50         2
    Stage IV       0.00      0.00      0.00         1

    accuracy                           0.71         7
   macro avg       0.56      0.62      0.59         7
weighted avg       0.61      0.71      0.65         7

Server_lg Test Confusion Matrix:
           Stage I  Stage II  Stage III  Stage IV
Stage I          1         0          0         0
Stage II         0         3          0         0
Stage III        0         1          1         0
Stage IV         0         0          1         0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return datetime.utcnow().replace(tzinfo=utc)


## Download

In [14]:
from google.colab import files
files.download(train_h5ad_path)
files.download(test_h5ad_path)
files.download(scaler_path)
files.download(selector_path)


# 1. 将文件夹压缩
!zip -r autogluon_rna_model.zip autogluon_rna_model

# 2. 下载压缩后的文件
from google.colab import files
files.download('autogluon_rna_model.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

  adding: autogluon_rna_model/ (stored 0%)
  adding: autogluon_rna_model/version.txt (stored 0%)
  adding: autogluon_rna_model/predictor.pkl (deflated 36%)
  adding: autogluon_rna_model/SummaryOfModels.html (deflated 76%)
  adding: autogluon_rna_model/metadata.json (deflated 67%)
  adding: autogluon_rna_model/learner.pkl (deflated 93%)
  adding: autogluon_rna_model/utils/ (stored 0%)
  adding: autogluon_rna_model/utils/attr/ (stored 0%)
  adding: autogluon_rna_model/utils/attr/CatBoost_BAG_L1/ (stored 0%)
  adding: autogluon_rna_model/utils/attr/CatBoost_BAG_L1/y_pred_proba_val.pkl (deflated 9%)
  adding: autogluon_rna_model/utils/attr/CatBoost_BAG_L2/ (stored 0%)
  adding: autogluon_rna_model/utils/attr/CatBoost_BAG_L2/y_pred_proba_val.pkl (deflated 15%)
  adding: autogluon_rna_model/utils/attr/LightGBM_BAG_L1/ (stored 0%)
  adding: autogluon_rna_model/utils/attr/LightGBM_BAG_L1/y_pred_proba_val.pkl (deflated 7%)
  adding: autogluon_rna_model/utils/attr/RandomForest_BAG_L1/ (stored 0%

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>