In [1]:
%matplotlib inline

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
import pandas as pd
filename = 'Solar_radiation_classification.csv' # Path to external dataset in CSV format
data = pd.read_csv(filename, header=0)
print(data.head())
data['Class'].value_counts()

In [9]:
X = data.drop('Class', axis=1)  # Features (all columns except 'Class')
y = data['Class']  # Assuming 'Class' column has 3 unique classes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
naive_bayes = GaussianNB(priors=None,var_smoothing=1e-09)
naive_bayes.fit(X_train_scaled, y_train)
y_pred = naive_bayes.predict(X_test_scaled)

In [38]:
# Evaluate the model
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,precision_recall_curve
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
recall=recall_score(y_test, y_pred,average='macro')  
# macro: Unweighted average of the metrics for each class.All classes are treated equally.
print(f'Recall: {recall:.4f}')
precision=precision_score(y_test, y_pred,average='macro')
print(f'Precision: {precision:.4f}')
f1=f1_score(y_test, y_pred,average='macro')
print(f'F1-Score: {f1:.4f}')



Accuracy: 0.7510
Recall: 0.6604
Precision: 0.7877
F1-Score: 0.6433


In [39]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
recall=recall_score(y_test, y_pred,average='weighted')
print(f'Recall: {recall:.4f}')
#weighted: Takes class imbalance into account by weighting each class's contribution by 
#its support (number of true instances).
precision=precision_score(y_test, y_pred,average='weighted')
print(f'Precision: {precision:.4f}')
f1=f1_score(y_test, y_pred,average='weighted')
print(f'F1-Score: {f1:.4f}')

Accuracy: 0.7510
Recall: 0.7510
Precision: 0.7715
F1-Score: 0.7051


In [40]:
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

Confusion Matrix:
[[  8  46   0]
 [  2 104   9]
 [  0   6  78]]


In [41]:
print('Classification Report:')
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

  Inspecting       0.80      0.15      0.25        54
  Monitoring       0.67      0.90      0.77       115
     Running       0.90      0.93      0.91        84

    accuracy                           0.75       253
   macro avg       0.79      0.66      0.64       253
weighted avg       0.77      0.75      0.71       253



In [15]:
# # Import necessary libraries
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.metrics import precision_recall_curve, average_precision_score
# from sklearn.preprocessing import label_binarize
# from sklearn.metrics import PrecisionRecallDisplay

# # Assuming y_test are the true labels and y_pred are the predicted probabilities
# # Let's also assume there are 3 classes in the classification task (adjust if needed)

# n_classes = 3  # Number of classes
# # Binarize the output (one-vs-rest strategy)
# y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
# y_pred_prob = log_reg.predict_proba(X_test_scaled)

# # Initialize variables to store precision, recall, and average precision for each class
# precision = dict()
# recall = dict()
# average_precision = dict()

# # Calculate precision-recall curve and average precision for each class
# for i in range(n_classes):
#     precision[i], recall[i], _ = precision_recall_curve(y_test_bin[:, i], y_pred_prob[:, i])
#     average_precision[i] = average_precision_score(y_test_bin[:, i], y_pred_prob[:, i])

# # Plot the Precision-Recall curve for each class
# plt.figure(figsize=(8, 6))
# for i in range(n_classes):
#     plt.plot(recall[i], precision[i], lw=2, label=f'Class {i} (AP = {average_precision[i]:0.2f})')

# plt.xlabel('Recall')
# plt.ylabel('Precision')
# plt.title('Precision-Recall Curve for Multiclass Classification')
# plt.legend(loc='best')
# plt.grid()
# plt.show()
