# Problem 3: Feedforward Neural Networks for Regression

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

# ===== Optional : import other libraries here ===== #
from sklearn.metrics import mean_squared_error

# ===== End of Optional : import other libraries here ===== #

## P3(a) Download and load dataset

- Download the concrete compressive strength dataset from UCI Machine Learning Repository from [link](http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength).
- Extract and put `Concrete_Data.xls` under directory `data/`.
- Pass the code block below to verify download.

In [3]:
df = None
try:
    df = pd.read_excel('./data/Concrete_Data.xls')
    print(f">>> Available features:", list(df.columns))
    print(f">>> Example 3 data points: \n", df.head(3))
except:
    raise Warning(f">>> Your dataset is NOT ready for the next step. Fix this first.")

>>> Available features: ['Cement (component 1)(kg in a m^3 mixture)', 'Blast Furnace Slag (component 2)(kg in a m^3 mixture)', 'Fly Ash (component 3)(kg in a m^3 mixture)', 'Water  (component 4)(kg in a m^3 mixture)', 'Superplasticizer (component 5)(kg in a m^3 mixture)', 'Coarse Aggregate  (component 6)(kg in a m^3 mixture)', 'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)', 'Concrete compressive strength(MPa, megapascals) ']
>>> Example 3 data points: 
    Cement (component 1)(kg in a m^3 mixture)  \
0                                      540.0   
1                                      540.0   
2                                      332.5   

   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  \
0                                                0.0       
1                                                0.0       
2                                              142.5       

   Fly Ash (component 3)(kg in a m^3 mixture)  \
0                                        

## P3(b) Split the dataset
- Pick the first 730 data points as the training set and the last 300 points as the test set.
- Use `Concrete compressive strength(MPa, megapascals) ` column as label (i.e., y).
- Your code should pass assertions at the end of code block. Do not proceed before pass.

In [4]:
x_train, y_train, x_test, y_test = None, None, None, None

# ===== Split training and test dataset ===== #

x_train = df.iloc[:730, :-1]  # Select the first 730 rows and all but the last column for features
y_train = df.iloc[:730, -1]   # Select the first 730 rows and only the last column for the class

x_test = df.iloc[-300:, :-1]  # Select the last 300 rows and all but the last column for features
y_test = df.iloc[-300:, -1]   # Select the last 300 rows and only the last column for the class
# ===== End of Split training and test dataset ===== #

# Convert to numpy arrays
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

assert(x_train.shape == (730, 8))
assert(x_test.shape == (300, 8))
assert(y_train.shape == (730,) or (y_train.shape[0] == 730))
assert(y_test.shape == (300,) or (y_test.shape[0] == 300))
print(f">>> P3(b) passed.")

>>> P3(b) passed.


## P3(c) Implement a neural network 
- Use a single layer with `early-stopping=False`.
- Use Trial and Error strategy to find the optimal network structure that yields the lowest test error.
- Your code should reflect your multiple trials and then report the optimal configurations.
- If you encounter warning such as "ConvergenceWarning", consider enlarge `max_iter` parameter in `MLPRegressor`.

In [5]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
# Note that the settings are not limited to below. Feel free to tune other parameters but we won't test below.
# Refer to https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html
hidden_layer_sizes = [(1,), (5,), (10,), (100,), (1000,)] # example: [(1)], note that it must be single layer
activation = ['relu'] # example: ['relu'], you need to exhaust this attribute
solver = ['sgd', 'adam'] # example: ['adam'], you need to exhaust this attribute
alpha = [0.0001, 0.001, 0.01, 0.05, 0.005, 0.0005, 0.5] # example: [1e-3]
max_iters = 5000
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) < 500)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                # ===== Implement a network with iterated settings ===== #
                # Note: set validation_fraction to 0.1 or leave as default
                mlp = MLPRegressor(hidden_layer_sizes=h, activation=a, solver=s, alpha=al, max_iter = max_iters, random_state=42, early_stopping=False)
                
                # ===== End of Implement a network with iterated settings ===== #
                
                # ===== Train network ===== #
                mlp.fit(x_train, y_train)
                # ===== End of Train network ===== #
                
                # ===== Test network ===== #
                y_pred = mlp.predict(x_test)
                # ===== End of Test network ===== #
                
                # ===== Compute mean squared error ===== #
                test_error = mean_squared_error(y_test, y_pred)
                # ===== End of Compute mean squared error ===== #
                print(test_error)
                # ===== Is it the best setting ===== #
                if test_error < best_test_error:
                    best_test_error = test_error
                    best_settings['hidden_layer_sizes'] = h
                    best_settings['activation'] = a
                    best_settings['solver'] = s
                    best_settings['alpha'] = al
                # ===== End of Is it the best setting ===== #

201.6100415992949
201.6100415992949
201.6100415992949
201.6100415992949
201.6100415992949
201.6100415992949
201.6100415992949
74.24333078666892
74.24166475569821
74.21181224474101
74.21114259787437
74.22393645766125
74.22232708476902
74.20761703701166
201.34488532686072
201.34488532686072
201.81372005378105




202.0051388937885
201.34488532686072
201.34488532686072
554.4943216378971
72.4963916301999
72.31677373339421
72.30189572775498
72.23694842278411
72.26941014743933
72.30186736169155
72.25350427195224
2.517945541662432e+24
2.5179454790517583e+24
2.5179448529451226e+24
2.517942070250395e+24
2.517945200782136e+24
2.517945513835455e+24
2.5179107650947533e+24
70.99906572283331
65.07216982950285
64.87779673872947
64.84078601666225
64.87730187502792
71.11845787194889
64.81564826167578
3.853264312967568e+25
3.85326426946181e+25
3.8532638344041974e+25
3.853261900815177e+25
3.8532640761028743e+25
3.8532642936316768e+25
3.8532401479825643e+25
94.51610968100833
56.77587569940931
92.0153760206476
61.978611419075214
84.11408367479434
73.50629829816567
97.01677385851553
1.1100000152086364e+41
1.1099994532859422e+41
1.1099938340739843e+41
1.1099688601273917e+41
1.1099969558550661e+41
1.1099997654652394e+41
1.1096879402598103e+41
52.18670889895816
66.09880609197772
61.53704656216663
65.93061774809243
67

In [6]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")
#my best is: 
# best_test_error=52.18670889895816
# best_settings={'hidden_layer_sizes': (1000,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.0001, 'batch_size': None, 'learning_rate_init': None}

>>> best_test_error=52.18670889895816
>>> best_settings={'hidden_layer_sizes': (1000,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.0001, 'batch_size': None, 'learning_rate_init': None}


# Extra Credit
- `early_stopping=True`
- Tune validation rate
- Your performance must beat part 3 to receive credits.

In [7]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
hidden_layer_sizes = [(1,), (5,), (20,), (30,), (40,), (50,), (10,), (100,), (1000,)] # example: [(1)], note that it must be single layer
activation = ['relu'] # example: ['relu'], you need to exhaust this attribute
solver = ['sgd', 'adam'] # example: ['adam'], you need to exhaust this attribute
alpha = [0.0001, 0.001, 0.01, 0.05, 0.005, 0.0005] # example: [1e-3]
validation_rate = [.1, .2]
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) * len(validation_rate) < 1000)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None,
    "validation_rate": None,
    "learning_rate": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                for v in validation_rate:
                        # ===== Implement a network with iterated settings ===== #
                        # Note: set validation_fraction to 0.1 or leave as default
                        mlp = MLPRegressor(hidden_layer_sizes=h, activation=a, solver=s, alpha=al, validation_fraction=v,max_iter = max_iters, random_state=42, early_stopping=True)

                        # ===== End of Implement a network with iterated settings ===== #

                        # ===== Train network ===== #
                        mlp.fit(x_train, y_train)
                        # ===== End of Train network ===== #

                        # ===== Test network ===== #
                        y_pred = mlp.predict(x_test)
                        # ===== End of Test network ===== #

                        # ===== Compute mean squared error ===== #
                        test_error = mean_squared_error(y_test, y_pred)
                        # ===== End of Compute mean squared error ===== #
                        print(test_error)
                        # ===== Is it the best setting ===== #
                        if test_error < best_test_error:
                            best_test_error = test_error
                            best_settings['hidden_layer_sizes'] = h
                            best_settings['activation'] = a
                            best_settings['solver'] = s
                            best_settings['alpha'] = al
                            best_settings['validation_rate'] = v
                        # ===== End of Is it the best setting ===== #

187.79834792064298
194.91961565373632
187.79834792064298
194.91961565373632
187.79834792064298
194.91961565373632
187.79834792064298
194.91961565373632
187.79834792064298
194.91961565373632
187.79834792064298
194.91961565373632
81.79678130119459
79.72079497639356
81.91279683195384
80.16678319842298
82.0672445688663
79.8515922365861
82.06879350389568
79.82479056700136
82.09397163131415
79.88241859993569
81.77954844330014
80.48843406485841
198.74960104442212
197.61325655437338
198.74960104442212
197.61325655437338
198.74960104442212
197.61325655437338
198.74960104442212
197.61325655437338
198.74960104442212
197.61325655437338
198.74960104442212
197.61325655437338
88.01510077919568
85.65206322763497
88.13855737150398
82.91875913575163
88.70984191009634
83.11807109288178
88.70134351941297
83.09772286103107
88.74351624674476
83.00603490654235
88.08149505530365
82.70574196450934
5.160768478252592e+25
3.245153676871857e+25
5.160768432963157e+25
3.245153649943984e+25
5.160767980068811e+25
3.24

In [8]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")
# best settings I've gotten, which is better than p3's best which is around 52:
#best_test_error=49.339340114870986
# best_settings={'hidden_layer_sizes': (20,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.001, 'batch_size': None, 'learning_rate_init': None, 'validation_rate': 0.1, 'learning_rate': None}

>>> best_test_error=49.339340114870986
>>> best_settings={'hidden_layer_sizes': (20,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.001, 'batch_size': None, 'learning_rate_init': None, 'validation_rate': 0.1, 'learning_rate': None}
