In [11]:
pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [12]:
# Import necessary libraries
from sklearn.datasets import load_iris, load_wine, fetch_openml
import pandas as pd
from tabulate import tabulate  # Import tabulate for table formatting

# Create a list to hold dataset summaries
dataset_summaries = []

# Function to summarize each dataset
def summarize_dataset(name, data, target):
    return {
        "Dataset": name,
        "Number of Classes": len(pd.Series(target).unique()),
        "Number of Features": data.shape[1],
        "Size of the Dataset": data.shape[0]
    }

# Function to safely load a dataset with error handling
def load_dataset(dataset_name, fetch_func, **kwargs):
    try:
        data, target = fetch_func(**kwargs, return_X_y=True)
        summary = summarize_dataset(dataset_name, data, target)
        dataset_summaries.append(summary)
    except ValueError as ve:
        print(f"ValueError while loading {dataset_name}: {ve}\n")
    except Exception as e:
        print(f"Failed to load {dataset_name}: {e}\n")

# Load and summarize all datasets
load_dataset("Iris", load_iris)
load_dataset("WBDC (Breast Cancer Wisconsin Diagnostic)", fetch_openml, data_id=1510)  # Using the correct WBDC ID
load_dataset("Spambase", fetch_openml, name="spambase", version=1)  # Specifying version
load_dataset("Heart", fetch_openml, data_id=533)  # Changed to the correct ID for the heart dataset
load_dataset("Glass", fetch_openml, name="glass", version=2)  # Specify version
load_dataset("WBC (Breast Cancer Wisconsin Original)", fetch_openml, data_id=15)  # Correct WBC ID
load_dataset("Ionosphere", fetch_openml, name="ionosphere", version=1)
load_dataset("Arrhythmia", fetch_openml, name="arrhythmia", version=1)  # Specify version
load_dataset("Multiple Features", fetch_openml, name="mfeat-factors", version=1)  # Specify version
load_dataset("Australian", fetch_openml, name="australian")  # Using the name directly
load_dataset("German Number (Credit Dataset)", fetch_openml, name="credit-g", version=1)  # Specify version
load_dataset("DNA", fetch_openml, name="dna", version=1)  # Specify version
load_dataset("Wine", load_wine)
load_dataset("Vehicle", fetch_openml, name="vehicle", version=2)  # Specify version
load_dataset("Waveform", fetch_openml, name="waveform-5000", version=2)  # Specify version
load_dataset("Zoo", fetch_openml, name="zoo", version=2)  # Specify version
load_dataset("Hillvalley", fetch_openml, name="hill-valley", version=2)  # Specify version
load_dataset("Sonar", fetch_openml, name="sonar", version=1)  # Specify version
load_dataset("Musk 1", fetch_openml, name="musk", version=1)  # Specify version

# Create a DataFrame from the collected summaries
df_datasets = pd.DataFrame(dataset_summaries)

# Display the DataFrame in an attractive table format
print(tabulate(df_datasets, headers='keys', tablefmt='pretty', showindex=False))

+-------------------------------------------+-------------------+--------------------+---------------------+
|                  Dataset                  | Number of Classes | Number of Features | Size of the Dataset |
+-------------------------------------------+-------------------+--------------------+---------------------+
|                   Iris                    |         3         |         4          |         150         |
| WBDC (Breast Cancer Wisconsin Diagnostic) |         2         |         30         |         569         |
|                 Spambase                  |         2         |         57         |        4601         |
|                   Heart                   |        14         |         4          |         559         |
|                   Glass                   |         2         |         9          |         214         |
|  WBC (Breast Cancer Wisconsin Original)   |         2         |         9          |         699         |
|                Io