In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, RandomizedSearchCV

def model_logistic_regression(prediction_data):
    class_data = df['math score discretized']
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2)

    model = LogisticRegression(max_iter=2000)

    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 10],
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear', 'saga']
    }

    optimized_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid,
        n_iter=10,
        scoring='accuracy',
        cv=5,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)
    best_model = optimized_model.best_estimator_

    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    y_pred_proba = best_model.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    print('Best Hyperparameters:', optimized_model.best_params_)
    print('Accuracy:', accuracy)
    print('ROC AUC:', roc_auc)

    return accuracy, roc_auc, fpr, tpr

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import numpy as np

def model_decision_tree(prediction_data):
    class_data = df['math score discretized']
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2)

    model = DecisionTreeClassifier(random_state=42)

    param_grid = {
        'ccp_alpha': [0.0, 0.01, 0.1, 1.0],
        'max_depth': [None, 5, 10, 15],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    optimized_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid,
        n_iter=10,
        scoring='accuracy',
        cv=5,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)
    best_model = optimized_model.best_estimator_

    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    y_pred_proba = best_model.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    print('Best Hyperparameters:', optimized_model.best_params_)
    print('Accuracy:', accuracy)
    print('ROC AUC:', roc_auc)

    return accuracy, roc_auc, fpr, tpr

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, RandomizedSearchCV

def model_linear_regression(prediction_data):
    class_data = df['math score']

    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2)

    model = LinearRegression()

    param_grid = {
        'fit_intercept': [True, False]
        }

    optimized_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid,
        n_iter=2,
        scoring='neg_mean_squared_error',
        cv=5,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)
    best_model = optimized_model.best_estimator_

    y_pred = best_model.predict(X_test)

    accuracy = best_model.score(X_test, y_test)
    print('Best Hyperparameters:', optimized_model.best_params_)
    print('Accuracy:', accuracy)

    mae = mean_absolute_error(y_test, y_pred)
    print('MAE:', mae)

    mse = mean_squared_error(y_test, y_pred)
    print('MSE:', mse)

    rmse = np.sqrt(mse)
    print('RMSE:', rmse)
    return rmse, None

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, RandomizedSearchCV

def model_neural_network(prediction_data):
    class_data = df['math score']

    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2)

    model = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', max_iter=2000, random_state=42)

    param_grid = {
        'hidden_layer_sizes': [(50, 50), (100, 100), (200, 200)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs']
    }

    optimized_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid,
        n_iter=2,
        scoring='neg_mean_squared_error',
        cv=5,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)
    best_model = optimized_model.best_estimator_

    y_pred = best_model.predict(X_test)

    accuracy = best_model.score(X_test, y_test)
    print('Best Hyperparameters:', optimized_model.best_params_)
    print('Accuracy:', accuracy)

    mae = mean_absolute_error(y_test, y_pred)
    print('MAE:', mae)

    mse = mean_squared_error(y_test, y_pred)
    print('MSE:', mse)

    rmse = np.sqrt(mse)
    print('RMSE:', rmse)
    return rmse, None


In [None]:
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

def model_voting_regressor(prediction_data):
    class_data = df['math score']
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2)

    linear_regression = LinearRegression()
    neural_network = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', max_iter=2000, random_state=42)

    linear_regression.fit(X_train, y_train)
    neural_network.fit(X_train, y_train)

    linear_regression_preds = linear_regression.predict(X_test)
    neural_network_preds = neural_network.predict(X_test)

    ensemble_model = VotingRegressor([('linear_regression', linear_regression),
                                      ('neural_network', neural_network)])

    param_grid = {
        'linear_regression__fit_intercept': [True, False]
    }

    optimized_model = RandomizedSearchCV(
        estimator=ensemble_model,
        param_distributions=param_grid,
        n_iter=1,
        scoring='neg_mean_squared_error',
        cv=5,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)
    best_model = optimized_model.best_estimator_
    ensemble_preds = best_model.predict(X_test)

    ensemble_accuracy = best_model.score(X_test, y_test)
    ensemble_mae = mean_absolute_error(y_test, ensemble_preds)
    ensemble_mse = mean_squared_error(y_test, ensemble_preds)
    ensemble_rmse = np.sqrt(ensemble_mse)

    print('Best Hyperparameters:', optimized_model.best_params_)
    print('Ensemble Accuracy:', ensemble_accuracy)
    print('Ensemble MAE:', ensemble_mae)
    print('Ensemble MSE:', ensemble_mse)
    print('Ensemble RMSE:', ensemble_rmse)

    return ensemble_rmse, None

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score

def model_random_forest(prediction_data):
    class_data = df['math score discretized']
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, class_data, test_size=0.2, random_state=42)

    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 5, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2']
    }

    model = RandomForestClassifier(random_state=42)

    cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    optimized_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grid,
        n_iter=10,
        scoring='accuracy',
        cv=cv_strategy,
        random_state=42
    )

    optimized_model.fit(X_train, y_train)

    print("Best Hyperparameters:", optimized_model.best_params_)
    best_model = optimized_model.best_estimator_
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    y_pred_proba = best_model.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    print("Best Hyperparameters:", optimized_model.best_params_)
    print("Accuracy:", accuracy)
    print('ROC AUC:', roc_auc)

    return accuracy, roc_auc, fpr, tpr