Title: Popular Classification Algorithms


Decision Trees


Task 1: Predict the loan default risk based on borrower characteristics.

In [5]:
# Predict the loan default risk based on borrower characteristics using a Decision Tree

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: borrower characteristics and default risk
data = {
    'age': [25, 40, 35, 28, 50, 45, 23, 38, 60, 30],
    'income': [40000, 80000, 60000, 35000, 120000, 90000, 32000, 70000, 150000, 50000],
    'loan_amount': [5000, 20000, 15000, 4000, 30000, 25000, 3000, 18000, 35000, 10000],
    'credit_score': [650, 720, 690, 600, 800, 750, 580, 710, 820, 670],
    'default': [0, 0, 0, 1, 0, 0, 1, 0, 0, 1]  # 1 = default, 0 = no default
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('default', axis=1)
y = df['default']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
# Fix: Specify labels to ensure both classes are reported even if one is missing in y_test
print("Classification Report:\n", classification_report(
    y_test, y_pred, labels=[0, 1], target_names=['No Default', 'Default']
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

  No Default       1.00      1.00      1.00         2
     Default       0.00      0.00      0.00         0

    accuracy                           1.00         2
   macro avg       0.50      0.50      0.50         2
weighted avg       1.00      1.00      1.00         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Task 2: Determine if a patient should be tested for a disease based on symptoms.

In [2]:
# Determine if a patient should be tested for a disease based on symptoms using a Decision Tree

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: patient symptoms and test recommendation
data = {
    'fever': [1, 0, 1, 1, 0, 0, 1, 0, 1, 0],
    'cough': [1, 1, 0, 1, 0, 1, 1, 0, 0, 0],
    'fatigue': [0, 1, 1, 1, 0, 0, 1, 0, 1, 0],
    'age': [25, 60, 35, 45, 30, 55, 40, 28, 50, 33],
    'should_test': [1, 1, 1, 1, 0, 0, 1, 0, 1, 0]  # 1 = test recommended, 0 = not recommended
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('should_test', axis=1)
y = df['should_test']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['No Test', 'Test']))

Accuracy: 0.5
Classification Report:
               precision    recall  f1-score   support

     No Test       0.00      0.00      0.00         0
        Test       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Task 3: Classify types of animals based on features like size, habitat, and diet.

In [4]:
# Classify types of animals based on features like size, habitat, and diet using a Decision Tree

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: animal features and type
data = {
    'size': [1, 2, 3, 2, 1, 3, 2, 1, 3, 2, 1, 2, 3, 1, 2],         # 1=small, 2=medium, 3=large
    'habitat': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1],      # 0=land, 1=water, 2=air
    'diet': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1],         # 0=herbivore, 1=carnivore, 2=omnivore
    'animal_type': [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 0, 2, 1]   # 0=mammal, 1=fish, 2=bird
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('animal_type', axis=1)
y = df['animal_type']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(
    y_test, y_pred, target_names=['Mammal', 'Fish', 'Bird']
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

      Mammal       1.00      1.00      1.00         1
        Fish       1.00      1.00      1.00         1
        Bird       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

