In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Create a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Parameters
num_iterations = 5  # Number of random subsampling iterations
test_size = 0.3  # 30% data for testing
accuracies = []

# Random subsampling
for i in range(num_iterations):
    # Randomly split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=i)
    
    # Train a model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate on the test set
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    
    print(f"Iteration {i+1} - Accuracy: {accuracy:.4f}")

# Average accuracy over all iterations
average_accuracy = np.mean(accuracies)
print(f"\nAverage Accuracy over {num_iterations} iterations: {average_accuracy:.4f}")

Iteration 1 - Accuracy: 0.8967
Iteration 2 - Accuracy: 0.9000
Iteration 3 - Accuracy: 0.8933
Iteration 4 - Accuracy: 0.8900
Iteration 5 - Accuracy: 0.8733

Average Accuracy over 5 iterations: 0.8907


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Initialize model
model = RandomForestClassifier(random_state=42)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(model, X, y, cv=5)  # cv=5 means 5-fold cross-validation

# Print each fold's accuracy
for i, score in enumerate(cv_scores):
    print(f"Fold {i+1} - Accuracy: {score:.4f}")

# Calculate and print average accuracy
average_score = cv_scores.mean()
print(f"\nAverage Accuracy: {average_score:.4f}")

Fold 1 - Accuracy: 0.9667
Fold 2 - Accuracy: 0.9667
Fold 3 - Accuracy: 0.9333
Fold 4 - Accuracy: 0.9667
Fold 5 - Accuracy: 1.0000

Average Accuracy: 0.9667


In [7]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from collections import Counter

# Create an imbalanced dataset
X, y = make_classification(
    n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42
)

# Display class distribution
print(f"Original Class Distribution: {Counter(y)}")

# Holdout Method (Random Sampling)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Holdout Class Distribution in Train: {Counter(y_train)}")
print(f"Holdout Class Distribution in Test: {Counter(y_test)}")

# Stratified Holdout
X_train_strat, X_test_strat, y_train_strat, y_test_strat = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)
print(f"\nStratified Holdout Class Distribution in Train: {Counter(y_train_strat)}")
print(f"Stratified Holdout Class Distribution in Test: {Counter(y_test_strat)}")

# Train and evaluate on the Holdout method
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred_holdout = model.predict(X_test)
accuracy_holdout = accuracy_score(y_test, y_pred_holdout)

# Train and evaluate on the Stratified Holdout method
model.fit(X_train_strat, y_train_strat)
y_pred_stratified = model.predict(X_test_strat)
accuracy_stratified = accuracy_score(y_test_strat, y_pred_stratified)

# Print accuracies
print(f"\nAccuracy using Holdout Method: {accuracy_holdout:.4f}")
print(f"Accuracy using Stratified Holdout Method: {accuracy_stratified:.4f}")

Original Class Distribution: Counter({0: 897, 1: 103})
Holdout Class Distribution in Train: Counter({0: 627, 1: 73})
Holdout Class Distribution in Test: Counter({0: 270, 1: 30})

Stratified Holdout Class Distribution in Train: Counter({0: 628, 1: 72})
Stratified Holdout Class Distribution in Test: Counter({0: 269, 1: 31})

Accuracy using Holdout Method: 0.9400
Accuracy using Stratified Holdout Method: 0.9533
