Title: Classification Model Performance Metrics

Accuracy, Precision, Recall, F1-Score:

Task 1: Evaluate a binary classifier for spam detection using accuracy, precision, recall and F1-score.

In [2]:
# Task 1: Evaluate a binary classifier for spam detection using accuracy, precision, recall, and F1-score

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Example synthetic dataset for spam detection
data = {
    'word_freq_offer': [0.1, 0.0, 0.3, 0.0, 0.2, 0.0, 0.4, 0.0, 0.5, 0.0],
    'word_freq_free':  [0.2, 0.0, 0.1, 0.0, 0.3, 0.0, 0.2, 0.0, 0.4, 0.0],
    'char_freq_$':     [0.05, 0.0, 0.1, 0.0, 0.07, 0.0, 0.12, 0.0, 0.15, 0.0],
    'spam':            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 = spam, 0 = not spam
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('spam', axis=1)
y = df['spam']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print(f"Accuracy:  {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1-score:  {f1:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=['Not Spam', 'Spam'], zero_division=0))

Accuracy:  1.00
Precision: 1.00
Recall:    1.00
F1-score:  1.00

Classification Report:
               precision    recall  f1-score   support

    Not Spam       1.00      1.00      1.00         2
        Spam       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3




Task 2: Compare performance of a multi-class classifier on recognizing animals.

In [3]:
# Task 2: Compare performance of a multi-class classifier on recognizing animals

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: animal features and type
data = {
    'size': [1, 2, 3, 2, 1, 3, 2, 1, 3, 2, 1, 2, 3, 1, 2],         # 1=small, 2=medium, 3=large
    'habitat': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1],      # 0=land, 1=water, 2=air
    'diet': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1],         # 0=herbivore, 1=carnivore, 2=omnivore
    'animal_type': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1]   # 0=mammal, 1=fish, 2=bird
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('animal_type', axis=1)
y = df['animal_type']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(
    y_test, y_pred, target_names=['Mammal', 'Fish', 'Bird'], zero_division=0
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

      Mammal       1.00      1.00      1.00         1
        Fish       1.00      1.00      1.00         1
        Bird       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



Task 3: Analyze classifier performance for predicting disease outbreaks.

In [4]:
# Task 3: Analyze classifier performance for predicting disease outbreaks

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Example synthetic dataset: features for disease outbreak prediction
data = {
    'avg_temp': [30, 22, 25, 28, 35, 20, 18, 32, 27, 24, 29, 21, 23, 31, 26],
    'rainfall': [100, 80, 90, 110, 120, 70, 60, 130, 95, 85, 105, 75, 65, 125, 100],
    'population_density': [500, 300, 400, 600, 700, 250, 200, 800, 450, 350, 550, 275, 225, 750, 425],
    'outbreak': [1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]  # 1 = outbreak, 0 = no outbreak
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('outbreak', axis=1)
y = df['outbreak']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print(f"Accuracy:  {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1-score:  {f1:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=['No Outbreak', 'Outbreak'], zero_division=0))

Accuracy:  0.67
Precision: 0.00
Recall:    0.00
F1-score:  0.00

Classification Report:
               precision    recall  f1-score   support

 No Outbreak       0.67      1.00      0.80         2
    Outbreak       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.33      0.50      0.40         3
weighted avg       0.44      0.67      0.53         3

