Title: Popular Classification Algorithms

Random Forest

Task 1: Predict employee attrition based on job satisfaction and salary.

In [2]:
# Task 1: Predict employee attrition based on job satisfaction and salary

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: job satisfaction, salary, and attrition
data = {
    'job_satisfaction': [3, 4, 2, 5, 1, 4, 2, 5, 3, 1],
    'salary': [50000, 70000, 40000, 90000, 35000, 80000, 42000, 95000, 60000, 37000],
    'attrition': [0, 0, 1, 0, 1, 0, 1, 0, 0, 1]  # 1 = left company, 0 = stayed
}
df = pd.DataFrame(data)

# Features and target
X = df[['job_satisfaction', 'salary']]
y = df['attrition']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(
    y_test, y_pred, labels=[0, 1], target_names=['Stayed', 'Left'], zero_division=0
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

      Stayed       1.00      1.00      1.00         2
        Left       0.00      0.00      0.00         0

    accuracy                           1.00         2
   macro avg       0.50      0.50      0.50         2
weighted avg       1.00      1.00      1.00         2



Task 2: Classify types of wine based on chemical analysis.

In [3]:
# Task 2: Classify types of wine based on chemical analysis

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load the Wine dataset
wine = load_wine()
X = wine.data
y = wine.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(
    y_test, y_pred, target_names=wine.target_names, zero_division=0
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       1.00      1.00      1.00        14
     class_2       1.00      1.00      1.00        10

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



Task 3: Predict housing loan approval based on financial and personal data.

In [4]:
# Task 3: Predict housing loan approval based on financial and personal data

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example synthetic dataset: financial and personal data for loan approval
data = {
    'income': [50000, 60000, 35000, 80000, 45000, 90000, 30000, 75000, 40000, 85000],
    'age': [25, 35, 28, 45, 32, 50, 27, 40, 30, 48],
    'loan_amount': [20000, 25000, 12000, 30000, 15000, 35000, 10000, 28000, 13000, 32000],
    'credit_score': [700, 720, 680, 750, 690, 770, 650, 740, 670, 760],
    'approved': [1, 1, 0, 1, 0, 1, 0, 1, 0, 1]  # 1 = approved, 0 = not approved
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('approved', axis=1)
y = df['approved']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(
    y_test, y_pred, labels=[0, 1], target_names=['Not Approved', 'Approved'], zero_division=0
))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

Not Approved       1.00      1.00      1.00         1
    Approved       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

