In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, root_mean_squared_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

In [2]:
iris = load_iris(return_X_y=True)
X = iris[0]
y = iris[1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(X=X_train, y=y_train)
y_pred = logistic_regression_model.predict(X=X_test)

In [10]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [13]:
y[:5]

array([0, 0, 0, 0, 0])

In [4]:
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro'),
r_squared = r2_score(y_test, y_pred)


results = {
    'Mean squared error': mse, 
    'Root mean squared error': rmse,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 score': f1,
    'R squared': r_squared,
    'Samples in Training features': X_train.shape[0],
    'Training labels': y_train.shape[0],
    'Samples in Testing features': X_test.shape[0],
    'Testing labels': y_test.shape[0]
    }

results_df = pd.DataFrame(data=results.items(), columns=['Metric', 'Value'])
results_df


Unnamed: 0,Metric,Value
0,Mean squared error,0.0
1,Root mean squared error,0.0
2,Accuracy,1.0
3,Precision,1.0
4,Recall,1.0
5,F1 score,"(1.0,)"
6,R squared,1.0
7,Samples in Training features,120
8,Training labels,120
9,Samples in Testing features,30


In [5]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [6]:
# The "support" for each class would simply be the number of samples belonging to that class.
# It is a descriptive statistic that helps to understand the distribution of your data and assess if your dataset is balanced or imbalanced.
# Find it also in classification_report() function.

unique_classes, class_count = np.unique(ar=y_test, return_counts=True)
support_df = pd.DataFrame(data=zip(unique_classes, class_count), columns=['Class', 'Support'])
support_df

Unnamed: 0,Class,Support
0,0,10
1,1,9
2,2,11


In [7]:
cm = confusion_matrix(y_test, y_pred)
confusion_matrix_df = pd.DataFrame(data=cm, index=['True_Class 0', 'True_Class 1', 'True_Class 2'], columns=['Prediction_Class 0', 'Prediction_Class 1', 'Prediction_Class 2'])
confusion_matrix_df

Unnamed: 0,Prediction_Class 0,Prediction_Class 1,Prediction_Class 2
True_Class 0,10,0,0
True_Class 1,0,9,0
True_Class 2,0,0,11


In [8]:
# Method 1 to calculate TP, FN, FP and TN (class-wise metrics)

TP = np.diag(cm)  # True Positives: diagonal elements
FN = np.sum(cm, axis=1) - TP  # False Negatives: row sums minus diagonal
FP = np.sum(cm, axis=0) - TP  # False Positives: column sums minus diagonal
TN = np.sum(cm) - (TP + FN + FP)  # True Negatives: total minus TP, FN, and FP

print(f'True Positive: {TP}, False Negative: {FN}, False Positive: {FP}, True Negative {TN}')

True Positive: [10  9 11], False Negative: [0 0 0], False Positive: [0 0 0], True Negative [20 21 19]


In [9]:
# Method 2 to calculate TP, FN, FP and TN (class-wise metrics)

TP = np.diag(cm)

FN = np.zeros(cm.shape[0], dtype='int') # array([0, 0, 0])
FP = np.zeros(cm.shape[0], dtype='int') 
TN = np.zeros(cm.shape[0], dtype='int')

for i in range(cm.shape[0]):
    FN[i] = np.sum(cm[i, :]) - cm[i, i]
    FP[i] = np.sum(cm[:, i]) - cm[i, i]
    TN[i] = np.sum(cm) - (TP[i] + FN[i] + FP[i])

print(f'True Positive: {TP}, False Negative: {FN}, False Positive: {FP}, True Negative {TN}')

True Positive: [10  9 11], False Negative: [0 0 0], False Positive: [0 0 0], True Negative [20 21 19]
