In [None]:
# Part 1: Importing Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

# Part 2: Loading the Data
# Load the dataset from the URL
url = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
data = pd.read_csv(url)

# Splitting data into predictors (X) and target (y)
X = data.drop(columns=['Strength'])
y = data['Strength']

# Part A: Without Normalization

# Function to create and train the model
def create_and_train_model(X_train, y_train, X_test, y_test):
    # Building the baseline model
    model = Sequential()
    model.add(Dense(10, input_shape=(X_train.shape[1],), activation='relu'))  # One hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer with a single neuron for regression
    
    # Compiling the model
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    
    # Training the model with 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluating the model on the test data
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return mse

# Running the Experiment 50 Times
mse_list = []

for i in range(50):
    # Splitting the data into training and testing sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
    
    # Train and evaluate the model, then store the mean squared error
    mse = create_and_train_model(X_train, y_train, X_test, y_test)
    mse_list.append(mse)
    print(f"Iteration {i+1}: MSE = {mse}")

# Reporting the Results for Part A
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"\nMean of MSE over 50 iterations (Without Normalization): {mean_mse}")
print(f"Standard Deviation of MSE over 50 iterations (Without Normalization): {std_mse}")

# Part B: Using Normalized Data

# Function to normalize the data
def normalize_data(X):
    return (X - X.mean(axis=0)) / X.std(axis=0)

# Normalize the predictors
X_normalized = normalize_data(X)

# Function to create and train the model on normalized data
def create_and_train_model_normalized(X_train, y_train, X_test, y_test):
    # Building the baseline model
    model = Sequential()
    model.add(Dense(10, input_shape=(X_train.shape[1],), activation='relu'))  # One hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer with a single neuron for regression
    
    # Compiling the model
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    
    # Training the model with 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluating the model on the test data
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return mse

# Running the Experiment 50 Times on Normalized Data
mse_list_normalized = []

for i in range(50):
    # Splitting the normalized data into training and testing sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=i)
    
    # Train and evaluate the model on normalized data, then store the mean squared error
    mse = create_and_train_model_normalized(X_train, y_train, X_test, y_test)
    mse_list_normalized.append(mse)
    print(f"Iteration {i+1}: MSE (Normalized) = {mse}")

# Reporting the Results for Normalized Data
mean_mse_normalized = np.mean(mse_list_normalized)
std_mse_normalized = np.std(mse_list_normalized)

print(f"\nMean of MSE over 50 iterations (Normalized Data): {mean_mse_normalized}")
print(f"Standard Deviation of MSE over 50 iterations (Normalized Data): {std_mse_normalized}")

# Compare with Step A
print(f"\nComparison with Step A:")
print(f"Difference in Mean MSE: {mean_mse_normalized - mean_mse}")
print(f"Difference in Standard Deviation of MSE: {std_mse_normalized - std_mse}")


  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])







  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])







2024-08-23 10:25:12.102618: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
2024-08-23 10:25:12.107384: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2394310000 Hz
2024-08-23 10:25:12.114320: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x56183fbe1d80 executing computations on platform Host. Devices:
2024-08-23 10:25:12.114379: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>


Iteration 1: MSE = 109.46359081114805
Iteration 2: MSE = 159.4700617907567
Iteration 3: MSE = 133.37523556057147
Iteration 4: MSE = 150.57748300555002
Iteration 5: MSE = 1705.6166517497343
Iteration 6: MSE = 427.51130406475687
Iteration 7: MSE = 154.70912301629835
Iteration 8: MSE = 131.4503069484145
Iteration 9: MSE = 193.16635499309203
Iteration 10: MSE = 1547.624177388336
Iteration 11: MSE = 385.14856666081175
Iteration 12: MSE = 108.33367460636323
Iteration 13: MSE = 199.17889327336238
Iteration 14: MSE = 313.58810108117024
Iteration 15: MSE = 250.55564921449695
Iteration 16: MSE = 296.72174894345983
Iteration 17: MSE = 122.23717455543178
Iteration 18: MSE = 100.49636985739919
Iteration 19: MSE = 98.61710307322039
Iteration 20: MSE = 1890.2077530906313
Iteration 21: MSE = 589.0002572735727
Iteration 22: MSE = 147.36663294509228
Iteration 23: MSE = 113.58027487929311
Iteration 24: MSE = 149.12373918938243
Iteration 25: MSE = 203.62871664783438
Iteration 26: MSE = 119.29642932705394
