<a href="https://colab.research.google.com/github/Amal211/DS_level2/blob/main/DS630_AI_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import the required libraries

import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt


# upload the dataset
df = pd.read_csv('https://raw.githubusercontent.com/Amal211/DS_level2/main/california_housing_sale_regression.csv')

missing_values = df.apply(lambda x: sum(x.isnull()), axis=0)   # find missing values

print('\nThe miising values by columns:\n\n{}\n\n' .format(missing_values))

# replace missing values with the mean value
df['total_bedrooms'].fillna(df['total_bedrooms'].mean(), inplace=True)

missing_values = df.apply(lambda x: sum(x.isnull()), axis=0)   # find missing values

print('\nThe miising values by columns:\n\n{}\n\n' .format(missing_values))

# drop the 'ocean_proximity' column
df.drop('ocean_proximity', axis=1, inplace=True)

def main():
    
    # identify features X and the target Y
    X = df.iloc[:, 0:7]
    Y = df['median_house_value']

    # create scaler to scaling the dataset variables to improve the model performance [2]
    scaler = StandardScaler()

    # fit and transform the dataset
    standardized = scaler.fit_transform(df)

    # inverse transform
    inverse = scaler.inverse_transform(standardized)

    # We created a sequintial model that has a stack of linear layers [3].
    # The model has 3 dense layers of fully connected neurons 
    # first layer with 16 outputs
    # second layer with 8 outputs
    # last layer with 1 output

    # we use the ReLU activation function in the first and second layer
    # ReLu or Rectified Linear Unit has a low computational cost
    # since it activate neurons with possitive values only.
    # negative values will result zero and correponding neurons will not get activated [4].

    # we used a linear function in the last layer to get 
    # a prediction of numerical value (negative or possitive numbers).
    # we did not use it in input and hidden layers 
    # since it does not allow for using backpropagation to train the data [5].

    model = Sequential()
    model.add(Dense(16, input_shape=(7,), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='linear'))


    # build the model and start training.
    # we use adam or Adaptive Moment Estimation that use adaptive learning rate, 
    # to update network weights iterative based in training data [6].

    # loss is the value that we seek to minimize during the model training,
    # the lower value of loss the closer is the prediction.
    # we use mean_absolute_error and mean_sequared_error to compute the loss. 
    # mean_absolute_error is equal to the mean of absolute difference between y_label and y_prediction,
    # while the mean_squared_error is the mean of squared difference between y_label and y_prediction [7].

    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_squared_error'])
    model.fit(X, Y, epochs=250, batch_size=10, verbose=2, validation_split=0.2)

    # compute the frequency of y_prediction matches y_lable
    accuracy = model.evaluate(X, Y, verbose=0)

    print(accuracy)

    # make a prediction
    y_prediction = model.predict(X)
    try:
        print('\nThe predicted median house values:\n\n{}\n\n' .format(y_prediction))
    except:
        print('Exception!')
    
    # show the metric
    plt.plot(model.history.history['mean_squared_error'])
    plt.xlabel('epoches')
    plt.ylabel('mean_squared_error')
    plt.show()

if __name__ == '__main__':
    main()

# The output
    