In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load datasets
train_df = pd.read_csv("train_data.csv")
test_df = pd.read_csv("test_data.csv")

# Extract features and labels
X_train = train_df.drop(columns=["class"])
y_train = train_df["class"]
X_test = test_df.drop(columns=["class"])
y_test = test_df["class"]

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
models = {
    "MLP": MLPClassifier(max_iter=100000, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=100000, random_state=42),
    "KNN": KNeighborsClassifier()
}

# Perform hyperparameter tuning using 5-fold cross-validation
param_grids = {
    "MLP": {"hidden_layer_sizes": [(2,), (5,), (10,), (15,), (20,), (50,), (50, 50), (100, 100), (200, 200), (100, 100, 100), (200, 200, 200), (500, 500, 500), (1000, 1000, 1000)]},
    "Logistic Regression": {"solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]},
    "KNN": {"n_neighbors": range(1, 21)}
}

tuned_models = {}
for model_name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5)
    grid_search.fit(X_train_scaled, y_train)
    tuned_models[model_name] = grid_search.best_estimator_

# Evaluate each feature's trained model accuracy
feature_scores = {}

for feature in X_train.columns:
    X_feature = X_train[[feature]]  # Use only one feature
    
    scores = {}
    for model_name, model in tuned_models.items():
        model.fit(X_feature, y_train)
        score = model.score(X_test[[feature]], y_test)
        scores[model_name] = score
    
    feature_scores[feature] = scores

# Find the best feature for each model
best_features = {model: max(feature_scores, key=lambda f: feature_scores[f][model]) for model in tuned_models}

# Train and evaluate models using the best feature
results_single_feature = {}

for model_name, feature in best_features.items():
    model = tuned_models[model_name]
    model.fit(X_train[[feature]], y_train)
    accuracy = model.score(X_test[[feature]], y_test)
    results_single_feature[model_name] = (feature, accuracy)

# Train and evaluate models using all features
results_all_features = {}

for model_name, model in tuned_models.items():
    model.fit(X_train_scaled, y_train)
    accuracy = model.score(X_test_scaled, y_test)
    results_all_features[model_name] = accuracy

# Print results along with the number of iterations for applicable models
print("Results using the best single feature:")
for model, (feature, accuracy) in results_single_feature.items():
    print(f"{model}: Best Feature = {feature}, Accuracy = {accuracy:.4f}")
    if hasattr(tuned_models[model], "n_iter_"):
        print(f"  Iterations: {tuned_models[model].n_iter_}")

print("\nResults using all features:")
for model, accuracy in results_all_features.items():
    print(f"{model}: Accuracy = {accuracy:.4f}")
    if hasattr(tuned_models[model], "n_iter_"):
        print(f"  Iterations: {tuned_models[model].n_iter_}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load Data
train_df = pd.read_csv("train_data.csv")
test_df = pd.read_csv("test_data.csv")

# Extract features and labels
X_train = train_df.drop(columns=["class"])
y_train = train_df["class"]
X_test = test_df.drop(columns=["class"])
y_test = test_df["class"]

# Scale Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    "MLP": MLPClassifier(max_iter=1000, early_stopping=True),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier()
}

# 5-fold cross-validation
param_grids = {
    "MLP": {"hidden_layer_sizes": [(2,), (5,), (10,), (15,), (20,), (50,), (50, 50), (100, 100), (200, 200), (100, 100, 100), (200, 200, 200), (500, 500, 500), (1000, 1000, 1000)]},
    "Logistic Regression": {'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']},
    "KNN": {"n_neighbors": range(1, 21)}
}

# Tune Models
tuned_models = {}
for model_name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5)
    grid_search.fit(X_train_scaled, y_train)
    tuned_models[model_name] = grid_search.best_estimator_

# CROSS VALIDATION FOR EACH FEATURE
feat_score = {}
for feature in X_train.columns:
    X_feature = X_train[[feature]]  # Use only one feature
    
    scores = {}
    for model_name, model in tuned_models.items():
        score = np.mean(cross_val_score(model, X_feature, y_train, cv=5))
        scores[model_name] = score
    
    feat_score[feature] = scores

# Best Feature for Each Model
best_features = {model: max(feat_score, key=lambda f: feat_score[f][model]) for model in tuned_models}

# Use Best Feature to Train Models
results_single_feature = {}
for model_name, feature in best_features.items():
    model = tuned_models[model_name]
    model.fit(X_train[[feature]], y_train)
    accuracy = model.score(X_test[[feature]], y_test)
    results_single_feature[model_name] = (feature, accuracy)

# TRAIN MODELS USING ALL FEATURES
results_all_features = {}
for model_name, model in tuned_models.items():
    model.fit(X_train_scaled, y_train)
    accuracy = model.score(X_test_scaled, y_test)
    results_all_features[model_name] = accuracy

# Print results
print("Results using the best single feature:")
for model, (feature, accuracy) in results_single_feature.items():
    print(f"{model}: Best Feature = {feature}, Accuracy = {accuracy:.4f}")

print("\nResults using all features:")
for model, accuracy in results_all_features.items():
    print(f"{model}: Accuracy = {accuracy:.4f}")