In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel

# Load the Optical Recognition of Handwritten Digits dataset from UCI
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra"
data = pd.read_csv(url, header=None)

# Separate features (pixels) and target (digit)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the dataset (scale features)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature Selection
rf_model = RandomForestClassifier()
selector = SelectFromModel(estimator=rf_model).fit(X_train_scaled, y_train)
X_train_selected = selector.transform(X_train_scaled)
X_test_selected = selector.transform(X_test_scaled)

# Define the model
rf_model = RandomForestClassifier()

# Define hyperparameters to tune
param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Perform Randomized Search with Cross-Validation
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)
random_search.fit(X_train_selected, y_train)

# Get the best model
best_rf_model = random_search.best_estimator_

# Evaluate the best model using cross-validation
cv_accuracy = cross_val_score(best_rf_model, X_train_selected, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(best_rf_model, X_train_selected, y_train, cv=5, scoring='precision_weighted')
cv_recall = cross_val_score(best_rf_model, X_train_selected, y_train, cv=5, scoring='recall_weighted')

# Fit the best model on the full training set
best_rf_model.fit(X_train_selected, y_train)

# Evaluate the best model on the test set
y_pred = best_rf_model.predict(X_test_selected)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print("Cross-Validation Accuracy:", np.mean(cv_accuracy))
print("Cross-Validation Precision:", np.mean(cv_precision))
print("Cross-Validation Recall:", np.mean(cv_recall))
print("Test Accuracy:", accuracy)
print("Test Precision:", precision)
print("Test Recall:", recall)


  pid = os.fork()
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Cross-Validation Accuracy: 0.9738412331653883
Cross-Validation Precision: 0.9741617044703921
Cross-Validation Recall: 0.972204037097654
Test Accuracy: 0.9803921568627451
Test Precision: 0.9805752193657814
Test Recall: 0.9803921568627451


In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Fetch the COIL-100 dataset
coil100 = fetch_openml(name='coil-100')

# Split the dataset into features (X) and target labels (y)
X, y = coil100.data, coil100.target

# Convert target labels to integers
y = y.astype(np.uint8)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
rf_model = RandomForestClassifier()

# Define hyperparameters to tune
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Randomized Search with Cross-Validation
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)
random_search.fit(X_train, y_train)

# Get the best model
best_rf_model = random_search.best_estimator_

# Evaluate the best model using cross-validation
cv_accuracy = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='precision_weighted')
cv_recall = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='recall_weighted')

# Fit the best model on the full training set
best_rf_model.fit(X_train, y_train)

# Evaluate the best model on the test set
y_pred = best_rf_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print("Cross-Validation Accuracy:", np.mean(cv_accuracy))
print("Cross-Validation Precision:", np.mean(cv_precision))
print("Cross-Validation Recall:", np.mean(cv_recall))
print("Test Accuracy:", accuracy)
print("Test Precision:", precision)
print("Test Recall:", recall)


OpenMLError: No active dataset coil-100 found.