In [1]:
from sklearn.datasets import fetch_openml  # To load the Satimage dataset from OpenML
from sklearn.naive_bayes import GaussianNB  # Gaussian Naive Bayes classifier
from sklearn.model_selection import train_test_split  # To split data into training and testing sets
from sklearn.metrics import accuracy_score, classification_report  # For model evaluation
import warnings

warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

# Step 1: Data Preparation
# ------------------------------------
# Load the Satimage dataset from OpenML (Statlog Landsat Satellite dataset)
sat_data = fetch_openml(name='satimage', version=1, as_frame=True)
df = sat_data.frame

# The dataset includes several spectral features and a target column 'class' representing land cover types.
# Extract features and convert the target to integer if needed.
X = df.drop(columns=['class'])
y = df['class'].astype('category').cat.codes  # Convert target to categorical codes

# Split the dataset into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Model Training
# ------------------------------------
# Instantiate the Gaussian Naive Bayes classifier.
gnb = GaussianNB()  # Assumes features are normally distributed within each class

# Train the model on the training data.
gnb.fit(X_train, y_train)

# Step 3: Evaluation
# ------------------------------------
# Predict the classes for the test data.
y_pred = gnb.predict(X_test)

# Evaluate the model's performance using accuracy and a classification report.
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Gaussian Naive Bayes Accuracy: {:.3f}".format(accuracy))
print("\nClassification Report:\n", report)


Gaussian Naive Bayes Accuracy: 0.807

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.83      0.86       289
           1       0.99      0.92      0.95       160
           2       0.93      0.88      0.90       270
           3       0.52      0.66      0.58       139
           4       0.58      0.69      0.63       136
           5       0.83      0.78      0.80       292

    accuracy                           0.81      1286
   macro avg       0.79      0.79      0.79      1286
weighted avg       0.82      0.81      0.81      1286

