In [14]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Convert the target labels to binary: Virginica vs Non-Virginica
df['target_binary'] = (df['target'] == 2).astype(int)

# Split the data into training and testing sets
X = df.drop(['target', 'target_binary'], axis=1)
y = df['target_binary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)


In [15]:

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)


Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



An accuracy score of 1.0, which is equivalent to 100%, implies that the model is achieving flawless predictions on the provided test data. In other words, it correctly classifies every individual data point within the test dataset. This exceptional outcome signifies that the model is performing exceptionally well, exhibiting an outstanding level of accuracy when applied to this particular dataset.

In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate the confusion matrix
confusion = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)


Accuracy: 1.0
Confusion Matrix:
 [[19  0]
 [ 0 11]]


The confusion matrix shows that there are no false positives (0) and no false negatives (0). All 19 samples from the "Non-Virginica" class were correctly classified, and all 11 samples from the "Virginica" class were correctly classified. This further supports the high accuracy score, as there are no misclassifications.

In [17]:
# Assuming `results` contains the model's predictions and the true labels
results = pd.DataFrame({'Predicted': model.predict(X_test), 'Actual': y_test})

# Add a column to indicate if the prediction is correct
results['Is Correct'] = results['Predicted'] == results['Actual']

# Assuming that 'Is Correct' is now a boolean column
misclassified_indices = results[~results['Is Correct']].index

# Use the indices to select misclassified instances from the original DataFrame
misclassified_instances = df.loc[misclassified_indices]

# Print the feature values of misclassified instances
print("Feature values for misclassified instances:")
print(misclassified_instances)

# You can also calculate and print summary statistics to identify common patterns.
print("Summary statistics for misclassified instances:")
print(misclassified_instances.describe())


Feature values for misclassified instances:
Empty DataFrame
Columns: [sepal length (cm), sepal width (cm), petal length (cm), petal width (cm), target, target_binary]
Index: []
Summary statistics for misclassified instances:
       sepal length (cm)  sepal width (cm)  petal length (cm)   
count                0.0               0.0                0.0  \
mean                 NaN               NaN                NaN   
std                  NaN               NaN                NaN   
min                  NaN               NaN                NaN   
25%                  NaN               NaN                NaN   
50%                  NaN               NaN                NaN   
75%                  NaN               NaN                NaN   
max                  NaN               NaN                NaN   

       petal width (cm)  target  target_binary  
count               0.0     0.0            0.0  
mean                NaN     NaN            NaN  
std                 NaN     NaN           