In [1]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
import os
import matplotlib.pyplot as plt
import torch



In [7]:
B = [
    "M-1.ndpi",
    "M-104.ndpi","M-105.ndpi","M-108.ndpi",
    "M-112.ndpi","M-121.ndpi","M-24.ndpi","M-30.ndpi",
    "M-32.ndpi"
]
E = [
    "M-10.ndpi",
    "M-100.ndpi","M-103.ndpi","M-109.ndpi","M-11.ndpi","M-110.ndpi",
    "M-111.ndpi","M-113.ndpi"
]
S = [
    "M-101.ndpi",
    "M-114.ndpi","M-65.ndpi","M-86.ndpi","M-87.ndpi"
]

In [3]:
from google.colab import auth
auth.authenticate_user()

In [4]:
# Autenticazione con scope più ampi
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os
import time

# Autenticazione migliorata
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

folder_id = '1gtE_lNw4MYiarzIu4qVAo-uu6aSzxS1d'
os.makedirs('/content/ndpi_files', exist_ok=True)



base_dir = '/content/ndpi_files'
for sub in ('B','E','S'):
    os.makedirs(os.path.join(base_dir, sub), exist_ok=True)

# Lista file con gestione errori
try:
    file_list = drive.ListFile({
        'q': f"'{folder_id}' in parents and trashed=false",
        'orderBy': 'title'
    }).GetList()
except Exception as e:
    print(f"Errore nel listare i file: {e}")
    exit()

# Crea dizionario per mappare i file
file_dict = {f['title']: f for f in file_list}
order_map = {'B': B, 'E': E, 'S': S}

# Download con retry e gestione errori
def download_with_retry(file_obj, dest_path, max_retries=100):
    for attempt in range(max_retries):
        try:
            print(f"Tentativo {attempt + 1} di download per {file_obj['title']}")
            file_obj.GetContentFile(dest_path)
            return True
        except Exception as e:
            print(f"Errore tentativo {attempt + 1}: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                print(f"Fallito download di {file_obj['title']} dopo {max_retries} tentativi")
                return False

for category, file_names in order_map.items():
    for file_name in file_names:
        if file_name in file_dict:
            f = file_dict[file_name]
            dest = os.path.join(base_dir, category, file_name)
            print(f"Scarico {file_name} in {category} …")

            # Verifica permessi del file
            try:
                permissions = f.GetPermissions()
                print(f"Permessi per {file_name}: {len(permissions)} permessi trovati")
            except:
                print(f"Impossibile verificare permessi per {file_name}")

            download_with_retry(f, dest)

Scarico M-87.ndpi in S …
Permessi per M-87.ndpi: 2 permessi trovati
Tentativo 1 di download per M-87.ndpi


In [8]:
dataset_csv = []
for i in range(len(B)):
    v = B[i].split('.')[0]
    dataset_csv.append([v,v,'B'])
for i in range(len(S)):
    v = S[i].split('.')[0]
    dataset_csv.append([v,v,'S'])
for i in range(len(E)):
    v = E[i].split('.')[0]
    dataset_csv.append([v,v,'E'])

In [9]:
print(dataset_csv)
print(len(dataset_csv))

[['M-1', 'M-1', 'B'], ['M-104', 'M-104', 'B'], ['M-105', 'M-105', 'B'], ['M-108', 'M-108', 'B'], ['M-112', 'M-112', 'B'], ['M-121', 'M-121', 'B'], ['M-24', 'M-24', 'B'], ['M-30', 'M-30', 'B'], ['M-32', 'M-32', 'B'], ['M-101', 'M-101', 'S'], ['M-114', 'M-114', 'S'], ['M-65', 'M-65', 'S'], ['M-86', 'M-86', 'S'], ['M-87', 'M-87', 'S'], ['M-10', 'M-10', 'E'], ['M-100', 'M-100', 'E'], ['M-103', 'M-103', 'E'], ['M-109', 'M-109', 'E'], ['M-11', 'M-11', 'E'], ['M-110', 'M-110', 'E'], ['M-111', 'M-111', 'E'], ['M-113', 'M-113', 'E']]
22


In [10]:
import csv
# Nome del file CSV
file_csv = 'ndpi_files/dataset.csv'

# Scrittura del file CSV
with open(file_csv, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Scrivere l'intestazione
    writer.writerow(['case_id', 'slide_id', 'label'])
    # Scrivere i dati
    writer.writerows(dataset_csv)

print(f"File CSV salvato: {file_csv}")

File CSV salvato: ndpi_files/dataset.csv


In [None]:
!pip install openslide-python openslide-bin
import openslide

import glob

ndpi_path_b = glob.glob('/content/ndpi_files/B/*.ndpi')[0]
print(len(ndpi_path_b))
slide_b = openslide.OpenSlide(ndpi_path_b)
ndpi_path_e = glob.glob('/content/ndpi_files/E/*.ndpi')[0]
print(len(ndpi_path_e))
slide_e = openslide.OpenSlide(ndpi_path_e)
ndpi_path_s = glob.glob('/content/ndpi_files/S/*.ndpi')[0]
print(len(ndpi_path_s))
slide_s = openslide.OpenSlide(ndpi_path_s)

print("Dimensione B livello 0:", slide_b.level_dimensions[0])
print("Dimensione E livello 0:", slide_e.level_dimensions[0])
print("Dimensione S livello 0:", slide_s.level_dimensions[0])



In [15]:
# Crea un archivio ZIP della cartella
import zipfile # Import the zipfile module

def create_zip_archive(folder_path, zip_name=None):
    """
    Crea un archivio ZIP di una cartella mantenendo la struttura
    """
    if zip_name is None:
        zip_name = f"{os.path.basename(folder_path)}.zip"

    print(f"🗜️ Creando archivio ZIP: {zip_name}")

    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                # Mantiene la struttura delle cartelle nell'archivio
                arc_name = os.path.relpath(file_path, os.path.dirname(folder_path))
                zipf.write(file_path, arc_name)
                print(f"  📁 Aggiunto: {arc_name}")

    # Mostra dimensione dell'archivio
    zip_size = os.path.getsize(zip_name)
    print(f"✅ Archivio creato: {zip_name} ({zip_size/1024/1024:.2f} MB)")
    return zip_name

In [16]:
# Percorso della cartella da caricare
folder_to_upload = '/content/ndpi_files'  # Cambia con il percorso della tua cartella
zip_filename = 'datasetWSI.zip'  # Nome dell'archivio
zip_path = create_zip_archive(folder_to_upload, zip_filename)


🗜️ Creando archivio ZIP: datasetWSI.zip
  📁 Aggiunto: ndpi_files/dataset.csv
  📁 Aggiunto: ndpi_files/E/M-100.ndpi
  📁 Aggiunto: ndpi_files/E/M-103.ndpi
  📁 Aggiunto: ndpi_files/E/M-10.ndpi
  📁 Aggiunto: ndpi_files/E/M-109.ndpi
  📁 Aggiunto: ndpi_files/E/M-113.ndpi
  📁 Aggiunto: ndpi_files/E/M-11.ndpi
  📁 Aggiunto: ndpi_files/E/M-111.ndpi
  📁 Aggiunto: ndpi_files/E/M-110.ndpi
  📁 Aggiunto: ndpi_files/S/M-86.ndpi
  📁 Aggiunto: ndpi_files/S/M-101.ndpi
  📁 Aggiunto: ndpi_files/S/M-65.ndpi
  📁 Aggiunto: ndpi_files/S/M-87.ndpi
  📁 Aggiunto: ndpi_files/S/M-114.ndpi
  📁 Aggiunto: ndpi_files/B/M-1.ndpi
  📁 Aggiunto: ndpi_files/B/M-24.ndpi
  📁 Aggiunto: ndpi_files/B/M-30.ndpi
  📁 Aggiunto: ndpi_files/B/M-32.ndpi
  📁 Aggiunto: ndpi_files/B/M-112.ndpi
  📁 Aggiunto: ndpi_files/B/M-104.ndpi
  📁 Aggiunto: ndpi_files/B/M-121.ndpi
  📁 Aggiunto: ndpi_files/B/M-105.ndpi
  📁 Aggiunto: ndpi_files/B/M-108.ndpi
✅ Archivio creato: datasetWSI.zip (11426.99 MB)


In [17]:
import torch
import requests
import json
import os

# Token di accesso Zenodo (sostituisci con il tuo)
ACCESS_TOKEN = 'uVSb7icJqT9efPM71KYgviJ50r7eML9ynei2q7hDkedVlFrf8fBsr9lFaJ3O'

# Crea una nuova deposizione
def create_deposition():
    url = 'https://zenodo.org/api/deposit/depositions'
    headers = {"Content-Type": "application/json"}
    params = {'access_token': ACCESS_TOKEN}

    data = {
        'metadata': {
            'title': 'DatasetMLiA-Completo-WSI',
            'upload_type': 'dataset',
            'description': 'Dataset WSI',
            'creators': [{'name': 'Raf-Tony-Luca'}]
        }
    }

    r = requests.post(url, params=params, data=json.dumps(data), headers=headers)
    return r.json()

# Carica il file
def upload_file(deposition_id, file_path):
    # Get bucket URL
    url = f'https://zenodo.org/api/deposit/depositions/{deposition_id}'
    params = {'access_token': ACCESS_TOKEN}
    r = requests.get(url, params=params)
    bucket_url = r.json()["links"]["bucket"]

    # Upload file
    filename = os.path.basename(file_path)
    with open(file_path, "rb") as fp:
        r = requests.put(f"{bucket_url}/{filename}",
                        data=fp,
                        params=params)
    return r.json()

# Pubblica il dataset
def publish_deposition(deposition_id):
    url = f'https://zenodo.org/api/deposit/depositions/{deposition_id}/actions/publish'
    params = {'access_token': ACCESS_TOKEN}
    r = requests.post(url, params=params)
    return r.json()

# Esegui l'upload
print("Creando deposizione...")
deposition = create_deposition()
deposition_id = deposition['id']

print(f"Caricando file... (ID: {deposition_id})")
upload_result = upload_file(deposition_id, zip_filename)

print("Pubblicando dataset...")
publication = publish_deposition(deposition_id)

print(f"Dataset pubblicato! DOI: {publication['doi']}")
print(f"URL: {publication['links']['record_html']}")


Creando deposizione...
Caricando file... (ID: 15700269)
Pubblicando dataset...
Dataset pubblicato! DOI: 10.5281/zenodo.15700269
URL: https://zenodo.org/record/15700269
