In [3]:
# data_preparation.py
import pandas as pd
from sklearn.datasets import load_breast_cancer

In [6]:
def load_and_preprocess_data():
    # Load dataset
    data = load_breast_cancer()
    df = pd.DataFrame(data.data, columns=data.feature_names)
    df['target'] = data.target
    
    # Add any preprocessing steps if necessary
    # Example: handle missing values, normalization, etc.
    
    return df

if __name__ == "__main__":
    df = load_and_preprocess_data()
    df.to_csv('breast_cancer_data.csv', index=False)

In [7]:
# feature_selection.py
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif

def select_features(df, k=10):
    X = df.drop(columns=['target'])
    y = df['target']
    
    # Select K best features
    selector = SelectKBest(score_func=f_classif, k=k)
    X_new = selector.fit_transform(X, y)
    
    # Get selected feature names
    mask = selector.get_support()
    selected_features = X.columns[mask]
    
    return pd.DataFrame(X_new, columns=selected_features), y

if __name__ == "__main__":
    df = pd.read_csv('breast_cancer_data.csv')
    X_selected, y = select_features(df)
    X_selected.to_csv('selected_features.csv', index=False)
    y.to_csv('target.csv', index=False)


In [8]:
# model_tuning.py
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

def grid_search_cv(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Define the model
    model = MLPClassifier(max_iter=100)

    # Define parameter grid
    parameter_space = {
        'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam'],
        'alpha': [0.0001, 0.05],
        'learning_rate': ['constant','adaptive'],
    }

    # Grid Search
    clf = GridSearchCV(model, parameter_space, n_jobs=-1, cv=5)
    clf.fit(X_train, y_train)
    
    print('Best parameters found:\n', clf.best_params_)
    print('Best score:\n', clf.best_score_)

    return clf

if __name__ == "__main__":
    X = pd.read_csv('selected_features.csv')
    y = pd.read_csv('target.csv').values.ravel()
    grid_search_cv(X, y)


Best parameters found:
 {'activation': 'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (50, 100, 50), 'learning_rate': 'constant', 'solver': 'adam'}
Best score:
 0.9220253164556962


