Title: Popular Classification Algorithms


Decision Trees


Task 1: Predict the loan default risk based on borrower characteristics.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Step 1: Create a synthetic dataset
np.random.seed(0)
data = {
    'income': np.random.randint(20000, 100000, 1000),
    'age': np.random.randint(21, 65, 1000),
    'loan_amount': np.random.randint(1000, 50000, 1000),
    'credit_score': np.random.randint(300, 850, 1000),
    'years_employed': np.random.randint(0, 40, 1000),
    'default': np.random.choice([0, 1], size=1000, p=[0.85, 0.15])  # 0 = no default, 1 = default
}

df = pd.DataFrame(data)

# Step 2: Define features and target
X = df.drop('default', axis=1)
y = df['default']

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 4: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

# Step 6: Make predictions
y_pred = model.predict(X_test_scaled)

# Step 7: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.85

Confusion Matrix:
 [[170   3]
 [ 27   0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.98      0.92       173
           1       0.00      0.00      0.00        27

    accuracy                           0.85       200
   macro avg       0.43      0.49      0.46       200
weighted avg       0.75      0.85      0.79       200



Task 2: Determine if a patient should be tested for a disease based on symptoms.

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Simulate dataset
np.random.seed(42)
n_samples = 1000

data = {
    'fever': np.random.choice([0, 1], size=n_samples),
    'cough': np.random.choice([0, 1], size=n_samples),
    'shortness_of_breath': np.random.choice([0, 1], size=n_samples),
    'fatigue': np.random.choice([0, 1], size=n_samples),
    'loss_of_taste': np.random.choice([0, 1], size=n_samples),
    'age': np.random.randint(10, 80, size=n_samples),
    'test_recommended': np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3])
}

df = pd.DataFrame(data)

# Step 2: Define features and target
X = df.drop('test_recommended', axis=1)
y = df['test_recommended']

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train a classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 5: Predict and evaluate
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.64

Confusion Matrix:
 [[114  37]
 [ 35  14]]

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.75      0.76       151
           1       0.27      0.29      0.28        49

    accuracy                           0.64       200
   macro avg       0.52      0.52      0.52       200
weighted avg       0.64      0.64      0.64       200



Task 3: Classify types of animals based on features like size, habitat, and diet.

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Simulate dataset
data = {
    'size': ['small', 'medium', 'large', 'medium', 'small', 'large', 'medium', 'small', 'large', 'medium'],
    'habitat': ['land', 'water', 'land', 'air', 'water', 'land', 'air', 'water', 'air', 'land'],
    'diet': ['herbivore', 'carnivore', 'omnivore', 'herbivore', 'carnivore', 'herbivore', 'omnivore', 'carnivore', 'herbivore', 'omnivore'],
    'animal_type': ['rabbit', 'shark', 'bear', 'parrot', 'dolphin', 'elephant', 'eagle', 'octopus', 'owl', 'dog']
}

df = pd.DataFrame(data)

# Step 2: Encode categorical features
le_size = LabelEncoder()
le_habitat = LabelEncoder()
le_diet = LabelEncoder()
le_type = LabelEncoder()

df['size'] = le_size.fit_transform(df['size'])
df['habitat'] = le_habitat.fit_transform(df['habitat'])
df['diet'] = le_diet.fit_transform(df['diet'])
df['animal_type_encoded'] = le_type.fit_transform(df['animal_type'])

# Step 3: Define features and target
X = df[['size', 'habitat', 'diet']]
y = df['animal_type_encoded']

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 6: Predictions and evaluation
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Optional: Decode predicted animal types
decoded_preds = le_type.inverse_transform(y_pred)
print("\nPredicted Animal Types:", decoded_preds)


Accuracy: 0.0

Confusion Matrix:
 [[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 0 0 0 0 0]
 [0 0 0 0 1 0]
 [0 0 0 0 0 0]
 [0 1 0 0 0 0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           2       0.00      0.00      0.00       0.0
           4       0.00      0.00      0.00       1.0
           6       0.00      0.00      0.00       1.0
           7       0.00      0.00      0.00       0.0
           9       0.00      0.00      0.00       1.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0


Predicted Animal Types: ['parrot' 'dolphin' 'bear']


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
