# Problem 3: Feedforward Neural Networks for Regression

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

# ===== Optional : import other libraries here ===== #

# ===== End of Optional : import other libraries here ===== #

## P3(a) Download and load dataset

- Download the concrete compressive strength dataset from UCI Machine Learning Repository from [link](http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength).
- Extract and put `Concrete_Data.xls` under directory `data/`.
- Pass the code block below to verify download.

In [None]:
df = None
try:
    df = pd.read_excel('./data/Concrete_Data.xls')
    print(f">>> Available features:", list(df.columns))
    print(f">>> Example 3 data points: \n", df.head(3))
except:
    raise Warning(f">>> Your dataset is NOT ready for the next step. Fix this first.")

## P3(b) Split the dataset
- Pick the first 730 data points as the training set and the last 300 points as the test set.
- Use `Concrete compressive strength(MPa, megapascals) ` column as label (i.e., y).
- Your code should pass assertions at the end of code block. Do not proceed before pass.

In [None]:
x_train, y_train, x_test, y_test = None, None, None, None

# ===== Split training and test dataset ===== #

# ===== End of Split training and test dataset ===== #

# Convert to numpy array
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

assert(x_train.shape == (730, 8))
assert(x_test.shape == (300, 8))
assert(y_train.shape == (730,) or (y_train.shape[0] == 730))
assert(y_test.shape == (300,) or (y_test.shape[0] == 300))
print(f">>> P3(b) passed.")

## P3(c) Implement a neural network 
- Use a single layer with `early-stopping=False`.
- Use Trial and Error strategy to find the optimal network structure that yields the lowest test error.
- Your code should reflect your multiple trials and then report the optimal configurations.
- If you encounter warning such as "ConvergenceWarning", consider enlarge `max_iter` parameter in `MLPRegressor`.

In [None]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
# Note that the settings are not limited to below. Feel free to tune other parameters but we won't test below.
# Refer to https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html
hidden_layer_sizes = [] # example: [(1)], note that it must be single layer
activation = [] # example: ['relu'], you need to exhaust this attribute
solver = [] # example: ['adam'], you need to exhaust this attribute
alpha = [] # example: [1e-3]
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) < 500)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                # ===== Implement a network with iterated settings ===== #
                # Note: set validation_fraction to 0.1 or leave as default
                
                # ===== End of Implement a network with iterated settings ===== #
                
                # ===== Train network ===== #
                
                # ===== End of Train network ===== #
                
                # ===== Test network ===== #
                
                # ===== End of Test network ===== #
                
                # ===== Compute mean squared error ===== #
                
                # ===== End of Compute mean squared error ===== #
                
                # ===== Is it the best setting ===== #
                
                # ===== End of Is it the best setting ===== #

In [None]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")

# Extra Credit
- `early_stopping=True`
- Tune validation rate
- Your performance must beat part 3 to receive credits.

In [None]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
hidden_layer_sizes = [] # example: [(1)], note that it must be single layer
activation = [] # example: ['relu'], you need to exhaust this attribute
solver = [] # example: ['adam'], you need to exhaust this attribute
alpha = [] # example: [1e-3]
validation_rate = [] # example: [.1]
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) * len(validation_rate) < 1000)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None,
    "validation_rate": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                for v in validation_rate:
                    # ===== Implement a network with iterated settings ===== #
                    # Note: set validation_fraction to 0.1 or leave as default
                    
                    # ===== End of Implement a network with iterated settings ===== #
                    
                    # ===== Train network ===== #
                    
                    # ===== End of Train network ===== #
                    
                    # ===== Test network ===== #
                    
                    # ===== End of Test network ===== #
                    
                    # ===== Compute mean squared error ===== #
                    
                    # ===== End of Compute mean squared error ===== #
                    
                    # ===== Is it the best setting ===== #
                    
                    # ===== End of Is it the best setting ===== #

In [None]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")