In [2]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import mutual_info_classif

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

#training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to evaluate the model with selected features
def evaluate_features(selected_features):
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]
    
    # Find optimal C value
    c_values = [0.001, 0.01, 0.1, 1, 10, 100]
    best_c = 0
    best_accuracy = 0
    
    for c in c_values:
        model = LogisticRegression(multi_class='ovr', penalty='l2', C=c, random_state=42)
        model.fit(X_train_selected, y_train)
        y_pred = model.predict(X_test_selected)
        accuracy = accuracy_score(y_test, y_pred)
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_c = c
    
    return best_accuracy, best_c

# Greedy Algorithm
def greedy_algorithm():
    selected_features = []
    remaining_features = list(range(X.shape[1]))
    
    for _ in range(5):
        best_feature = None
        best_accuracy = 0
        
        for feature in remaining_features:
            current_features = selected_features + [feature]
            accuracy, _ = evaluate_features(current_features)
            
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = feature
        
        selected_features.append(best_feature)
        remaining_features.remove(best_feature)
    
    return selected_features

# Mutual Information Algorithm
def mutual_information_algorithm():

    X_discretized = np.digitize(X, bins=[5, 11]) - 1
    
    # Calculate mutual information
    mi_scores = mutual_info_classif(X_discretized, y)
    
    # Select top 5 features
    selected_features = mi_scores.argsort()[-5:][::-1]
    
    return selected_features


greedy_features = greedy_algorithm()
mi_features = mutual_information_algorithm()

# Evaluate both algorithms
greedy_accuracy, greedy_c = evaluate_features(greedy_features)
mi_accuracy, mi_c = evaluate_features(mi_features)

print("Greedy Algorithm:")
print("Selected features:", greedy_features)
print("Best accuracy:", greedy_accuracy)
print("Best C value:", greedy_c)

print("\nMutual Information Algorithm:")
print("Selected features:", mi_features)
print("Best accuracy:", mi_accuracy)
print("Best C value:", mi_c)



Mutual Information Algorithm:
Selected features: [42 26 21 34 36]
Best accuracy: 0.7055555555555556
Best C value: 10


