# Imports

In [None]:
import keras 
import pandas as pd 
import numpy as np 
import io

# Data Download (google colab)

In [None]:
AllBostonData  = pd.read_csv('listings.csv')


# Preprocessing

## Features

In [None]:
print(AllBostonData.head(10))
print(AllBostonData.info())

## Removed Features

In [None]:
# removing location based columns as well because longitude and latitude are included. 
columnIndexToRemove  = [i for i in range(0, 26) ]
columnIndexToRemove+=( 29, 30, 31, 37, 38, 39, 40, 41, 42, 43,  44, 45, 46, 47, 59, 61, 62, 69,70,  75,77,78, 86, 87, 88  )

#pd.options.display.max_columns = None
print("Dropped data:")
print(AllBostonData.iloc[:,columnIndexToRemove].info())
BostonData  = AllBostonData.drop(columns=AllBostonData.iloc[:,columnIndexToRemove].columns)


### New DataFrame Display

In [None]:
print(BostonData.info())

## Columns that we have to modify

### Find Nan's in the objects.

In [None]:
columnsToOneHotEncode = ['property_type', 'room_type','bed_type', 'cancellation_policy']
columnToScaleAndParse = ['host_response_rate', 'host_acceptance_rate']
TrueFalseColumns = ['host_is_superhost', 'host_has_profile_pic', 'host_identity_verified', 'is_location_exact', 'instant_bookable', 'require_guest_profile_picture', 'require_guest_phone_verification']
dollarSignColumns = ["extra_people", "price", "security_deposit", "cleaning_fee"]
nanIndexDict = {}
for column in columnsToOneHotEncode:
  
  if BostonData[column][BostonData[column].isnull()].empty == False:
    print("{} has null values".format(column))
    nanIndexDict[column] = BostonData[column][BostonData[column].isnull()].index
  #print(BostonData[column][BostonData[column].isnull()].head())
for column in nanIndexDict.keys():
      for index in nanIndexDict[column]:
        # use Placeholder as placeholder.  We don't need to change this value, as it will be encoded into its own category.  
        BostonData.loc[index, column] = "PlaceHolder"

### Hot encoding False to 0 or 1 

In [None]:
TrueFalseColumns = ['host_is_superhost', 'host_has_profile_pic', 'host_identity_verified', 'is_location_exact', 'instant_bookable', 'require_guest_profile_picture', 'require_guest_phone_verification']
def to10(xVals):
    return 1 if xVals =='t' else 0  

for TrueFalseCol in TrueFalseColumns: 
  #print(EncodedBostonData[TrueFalseCol][EncodedBostonData[TrueFalseCol] == "t"])
    BostonData[TrueFalseCol] = BostonData[TrueFalseCol].map(to10)


### % to decimal

In [None]:
columnToScaleAndParse = ['host_response_rate', 'host_acceptance_rate']
def removeAndScale(xVals):
    if type(xVals) == float: 
        return np.nan
    newVal =  xVals.replace("%", "")
    newVal = float(newVal)
    newVal = newVal /100  
    return newVal
for column in columnToScaleAndParse:
    BostonData[column] = BostonData[column].map(removeAndScale)


In [None]:
print( BostonData["host_response_rate"])

### Remove $$$ signs

In [None]:
# Need to check which data to parse
#print(EncodedBostonData.info() )
dollarSignColumns = ["extra_people", "price", "security_deposit", "cleaning_fee"]

In [None]:
def removeDollar(xVals):
    if type(xVals) == float: 
        return np.nan
    newVal = xVals.replace("$", "")
    if ',' in newVal:
        newVal = newVal.replace(",", "")
    newVal = float(newVal)
    return newVal
for column in dollarSignColumns:
  #EncodedBostonData[column] =
  BostonData[column] = BostonData[column].map(removeDollar)

  

In [None]:
print(BostonData["cleaning_fee"][BostonData["cleaning_fee"]  != np.nan])


### Move Price to end of DataFrame to complete Preprocessing

In [None]:
cols = list(BostonData)
# move the column to head of list using index, pop and insert
cols.insert(2000, cols.pop(cols.index('price')))
BostonData = BostonData.loc[:, cols]

### Before One Hotencoding: EDA

#### HeatMap Correlation

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
corr = BostonData.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=np.bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(20, 15))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=1)

#### Bar graphs of "Objects" dtypes with respect to price

In [None]:
import matplotlib
%matplotlib inline
#pd.DataFrame(AllBostonData).dtypes.value_counts().plot(kind='bar', rot=0)
categoricalDataFrame = AllBostonData.loc[:,AllBostonData.dtypes == 'object'].columns
#These were all objects that we need to check within the BostonData variable/ 
objectsToCheck = []
for value in categoricalDataFrame:
    for otherValue in BostonData:
        if otherValue in value:
            objectsToCheck.append(otherValue)
print(len(objectsToCheck))
mySet = set(BostonData.columns) - set(objectsToCheck)
myInts = list(mySet)
print(len(myInts))
objectsToCheck.remove('amenities')

In [None]:
import seaborn as sns 
fig, ax = plt.subplots(8,2, figsize=(25,30))
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=1.5, wspace=0.4)
for variable, subplot in zip(objectsToCheck, ax.flatten()):
    subplot.title.set_text("Count of {}".format(variable))
    sns.countplot(BostonData[variable], ax=subplot)
    for label in subplot.get_xticklabels():
        label.set_rotation(45)
  #subplot.axes.get_xaxis().set_visible(False)

In [None]:
#sns.countplot(BostonData['price'] )
sns.distplot(BostonData['price'],rug=True, hist=False)

#### Scatter plots of Numerical Values with respect to Price 


In [None]:
import seaborn as sns 
fig, ax = plt.subplots(7,4, figsize=(20,30))
for variable, subplot in zip(myInts, ax.flatten()):
    sns.scatterplot(x=BostonData[variable],y=BostonData['price'], ax=subplot)
    if variable == 'longitude' or variable == 'latitude':  
        for label in subplot.get_xticklabels():
              label.set_rotation(45)


In [None]:
# This shows data with hsitorgrams, but harder to show in presentation.  
import seaborn as sns 
for variable in myInts:
    sns.jointplot(x=BostonData[variable],y=BostonData['price'])
  


### One Hot Encoding

In [None]:
specialOneHotEncoding = ['host_verifications', 'Amenities']

columnsToOneHotEncode = ['property_type', 'room_type','bed_type', 'cancellation_policy']



In [None]:
# REMINDER TO USE KERAS FOR ONE HOT ENCODING
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
EncodedBostonData = BostonData
EncodedBostonData.loc[:, [*columnsToOneHotEncode]] = BostonData.loc[:, [*columnsToOneHotEncode]].apply(le.fit_transform)

pd.options.display.max_rows = 10

for column in columnsToOneHotEncode: 
    encoded = to_categorical(EncodedBostonData[column])
    newLabels = ["{}{}".format(column, x ) for x in range(0, len(encoded[0]))] # add labels for each new column
    dfToAdd = pd.DataFrame(encoded,columns=newLabels) # new df 

    EncodedBostonData.drop(columns=column, inplace=True) # drop old data 
    EncodedBostonData = pd.concat([EncodedBostonData, dfToAdd], axis=1) # add new columns 
print(EncodedBostonData)

In [None]:
EncodedBostonData['TV']=0
EncodedBostonData['Internet']=0
EncodedBostonData['Wireless Internet']=0
EncodedBostonData['Air Conditioning']=0
EncodedBostonData['Kitchen']=0
EncodedBostonData['Free Parking on Premises']=0
EncodedBostonData['Heating']=0
EncodedBostonData['Family/Kid Friendly']=0
EncodedBostonData['Smoke Detector']=0
EncodedBostonData['Carbon Monoxide Detector']=0
EncodedBostonData['Fire Extinguisher']=0
EncodedBostonData['Essentials']=0
EncodedBostonData['Shampoo']=0
EncodedBostonData['Lock on Bedroom Door']=0
EncodedBostonData['24-Hour Check-in']=0
EncodedBostonData['Hangers']=0
EncodedBostonData['Hair Dryer']=0
EncodedBostonData['Iron']=0
EncodedBostonData['Washer']=0
EncodedBostonData['Laptop Friendly Workspace']=0
EncodedBostonData['Dryer']=0
EncodedBostonData['Smoke Detector']=0
EncodedBostonData['Safety Card']=0
EncodedBostonData['First Aid Kit']=0
EncodedBostonData['Pets Allowed']=0
EncodedBostonData['Pets live on this property']=0
EncodedBostonData['Dog(s)']=0
EncodedBostonData['Gym']=0
EncodedBostonData['Smoking Allowed']=0
EncodedBostonData['Elevator in Building']=0
EncodedBostonData['Cable TV']=0

for column in EncodedBostonData[['amenities']]:  
  columnSeriesObj = EncodedBostonData[column]
  #print('Colunm Name : ', column)
  #print('Column Contents : ', columnSeriesObj.index) 
  k=0
    for value in columnSeriesObj:
    i=0
    for temp in value.split(','):
        temp1 = temp.replace("\"","").replace("{","").replace("}","")
        print("1"+temp1)
        i = i+1
        if(temp1=="TV"):
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Wireless Internet"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Air Conditioning"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Kitchen"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Free Parking on Premises"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Heating"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Family/Kid Friendly"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Smoke Detector"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Carbon Monoxide Detector"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Fire Extinguisher"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Essentials"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Shampoo"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Lock on Bedroom Door"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="24-Hour Check-in"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Hangers"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Hair Dryer"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Iron"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Washer"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Dryer"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Smoke Detector"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Safety Card"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="First Aid Kit"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Pets Allowed"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Pets live on this property"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Dog(s)"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Gym"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Smoking Allowed"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Elevator in Building"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="Cable TV"):  
            EncodedBostonData.at[k,temp1]=1


        #BostonData['newCol'][value.index]="hi"
        #BostonData.iloc[value.index,['newCol']] = "hi"
    #print(i)  
    k=k+1

list1 = ['TV','Internet', 'Wireless Internet', 'Air Conditioning', 'Kitchen', 'Free Parking on Premises', 'Heating', 
         'Family/Kid Friendly', 'Smoke Detector', 'Carbon Monoxide Detector', 'Fire Extinguisher', 'Essentials', 'Shampoo',
         'Lock on Bedroom Door', '24-Hour Check-in', 'Hangers', 'Hair Dryer', 'Iron', 'Laptop Friendly Workspace',
         'Washer','Dryer','Smoke Detector','Safety Card','First Aid Kit','Pets Allowed','Pets live on this property','Dog(s)','Gym',
         'Smoking Allowed','Elevator in Building','Cable TV']
#print(EncodedBostonData.loc[:,[*list1]])    

In [None]:
EncodedBostonData['email']=0
EncodedBostonData['phone']=0
EncodedBostonData['manual_online']=0
EncodedBostonData['reviews']=0
EncodedBostonData['manual_offline']=0
EncodedBostonData['kba']=0
EncodedBostonData['facebook']=0
EncodedBostonData['jumio']=0
EncodedBostonData['amex']=0
EncodedBostonData['linkedin']=0
EncodedBostonData['google']=0
EncodedBostonData['weibo']=0


for column in EncodedBostonData[['host_verifications']]:  
    columnSeriesObj = EncodedBostonData[column]
    #print('Colunm Name : ', column)
    #print('Column Contents : ', columnSeriesObj.index) 
    k=0
    for value in columnSeriesObj:
    i=0
    for temp in value.split(','):
        temp1 = temp.replace("'","").replace("[","").replace("]","").replace(" ","")
        i = i+1
        if(temp1=="email"):
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="phone"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="manual_online"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="reviews"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="manual_offline"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="kba"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="facebook"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="jumio"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="amex"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="linkedin"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="google"):  
            EncodedBostonData.at[k,temp1]=1
        elif(temp1=="weibo"):  
            EncodedBostonData.at[k,temp1]=1     
    #print(i)  
    k=k+1

list1 = ['email','phone', 'manual_online', 'reviews', 'manual_offline', 'kba', 'facebook', 
         'jumio', 'amex', 'linkedin', 'google', 'weibo']
print(EncodedBostonData.loc[:,[*list1]])   

Remove amneities and host_verifications  now that we have replaced it. 

In [None]:
EncodedBostonData.drop(columns=['amenities'], inplace=True)
EncodedBostonData.drop(columns=['host_verifications'], inplace=True)

In [None]:
EncodedBostonData.info

### Replace NA with means of each column

In [None]:
for allColumns in EncodedBostonData.columns:
    if allColumns == 'security_deposit' or allColumns == 'cleaning_fee':
        EncodedBostonData[allColumns].fillna(0, inplace=True)
    else:
        EncodedBostonData[allColumns].fillna((EncodedBostonData[allColumns].mean()), inplace=True)

# Because we one hot encoded, put y at end again. before we did this to EDA 
cols = list(EncodedBostonData)
# move the column to head of list using index, pop and insert
cols.insert(2000, cols.pop(cols.index('price')))
EncodedBostonData = EncodedBostonData.loc[:, cols]

# Model

In [None]:
from keras import Sequential, optimizers, callbacks
from keras import losses
from keras.layers import Dense
from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import math


In [None]:
def buildModel(modelName,nfeat,act_func='tanh',optimzer=optimizers.adam(lr=0.1)):
    model = Sequential()

    if modelName == 'perceptron':
        model.add(Dense(8,input_dim=nfeat,activation=act_func, use_bias=True))
        model.add(Dense(1))  #1st Hidden Layer - 3 neuron

    if modelName == 'neuralnet_3L':
        model.add(Dense(5,input_dim=nfeat,activation=act_func, use_bias=True))               #1st Hidden Layer
        model.add(Dense(3,activation=act_func))
        model.add(Dense(1))                               #Output Layer
    if modelName == 'neuralnet_4L':
        model.add(Dense(15,input_dim=nfeat,activation=act_func, use_bias=True))               #1st Hidden Layer
        model.add(Dense(10,activation=act_func))
        model.add(Dense(5,activation=act_func))   #2nd Hidden Layer
        model.add(Dense(1, activation=keras.activations.linear))                               #Output Layer             #Output Layer
    model.compile(loss=losses.mean_squared_error, optimizer=optimzer)

    return model
def adj_r2(rSquare, numIndependentVar, numSamples):
    return 1-(1-rSquare)*(numSamples -1)/ (numSamples - numIndependentVar - 1)

def graphAttsVRVals(rsq, rCv, rbar, numParams, columnName):
    import matplotlib.pyplot as plt
    plt.plot(numParams, rsq, label="R^2")
    plt.plot( numParams, rCv, label="rCv")
    plt.plot(numParams, rbar,  label="rAdj")
    plt.xlabel("Number params")
    plt.ylabel("RVals")
    plt.title("RVals for Y={}".format(columnName))
    plt.legend()
    plt.show()
    plt.clf()

In [None]:
def forwardSelect( XIndices, colsToUse, xVals, yVals, modelParams, previousRBase):

    # param:  XIndices is an array of all incices of the X attributes
    # param: colsTouse is ab array of selected columns that the model should use for the base.
    # param: modelToUse is the model to predict with.  it should be passed with the object instantiated
    initList = XIndices
    featuresToUse = colsToUse
    # find what index we need to test.  e.i what is not already in the model
    featuresToTest = list(set(initList) - set(featuresToUse))

    # analyzer will the index of attribute that was tested, and the new r-value the model created with the added attribute
    analyzer = pd.Series(index=featuresToTest)

    # Get all the columns we need
    xInput = xVals.loc[:,[*featuresToUse]]
    # have to build
    #myBaseModel = buildModel(modelParams['model'], len(xInput.columns), modelParams['activation'], modelParams['optimizer'])
    # yVals = yVals.tolist()
    #myBaseModel.fit(x=xInput, y=yVals, epochs=modelParams['epochs'], verbose=1)

    myBaseRSquare = previousRBase
    print(xInput)
    print(yVals)
    for newColumn in featuresToTest:
        # add the feature we want to test to the base X attribtues  we are already using
        combinedX = featuresToUse + [newColumn]
        # create model with the indices we want to experiment with.

        myModel = buildModel(modelParams['model'], len(combinedX), modelParams['activation'], modelParams['optimizer'])
        myModel.fit(xVals.iloc[:,[*combinedX]], yVals, epochs=modelParams['epochs'], verbose=1) #modelToUse.fit(xVals.iloc[:,[*combinedX]], yVals)
        # add rsquare valeus at location of the index we are experimenting with.

        analyzer.loc[newColumn] = r2_score(yVals,myModel.predict(xVals.loc[:,[*combinedX]])) # this gets rsquare value.
        print(analyzer)
    #if there is a max value in anaylzer.  Let us add that index to the list if the rsquare is better than the current models.
    if (analyzer.max()):
        if (analyzer.max() > myBaseRSquare):
            # index of what column to add that gave best r-value, the actual r-value, rsquar-bar value, and rcv value
            return analyzer.idxmax(), analyzer.max()
    # else no added features is better than base model
    return -1, None

def forwardSelectAll(xAtts, yAtts, modelParams):
    xIndices = list( (range(0,len(xAtts.columns))))
    rSqVals = [-999] # R^2, R^2 Bar, R^2 cv
    rCvVals = [0]
    rBarVals = [0]
    cols = [0] # Question?:  does this value represent the 1's column.
    # going to iterate through each value in xIndexArray and pass to forwardSelectMethod to determine rVals
    numFeatures = [1]
    for i in range(0, len(xAtts.columns)):
        myY = yAtts
        next_jIdx, next_j = forwardSelect(xIndices, cols, xAtts, myY, modelParams, rSqVals[-1])
        if(next_jIdx ==-1):
            break # means we found all columns that are significant.
        cols.append(next_jIdx)
        numFeatures.append(numFeatures[-1]+1)
        rSqVals.append(next_j)# calcualte rsquare, rsquarebar, and rcv here.

        ## KFOLD R Value
        kfold = KFold(n_splits=5, shuffle=True)
        cvScoreArray = []
        for train, test in kfold.split(xAtts.iloc[:,[*cols]], yAtts):
            model = buildModel(modelParams['model'], numFeatures[-1], modelParams['activation'], modelParams['optimizer'])
            model.fit(xAtts.iloc[train, [*cols]], yAtts.iloc[train], verbose=0, epochs=modelParams['epochs'])
            cvScoreArray.append(r2_score(yAtts.iloc[test], model.predict(xAtts.iloc[test,[*cols]])))
        rCvVals.append(sum(cvScoreArray)/len(cvScoreArray))
        rBarVals.append(adj_r2(next_j, numFeatures[-1], len(yAtts)))
    return cols, numFeatures, rSqVals, rCvVals, rBarVals

# NNXL Implementation and Results

In [None]:
myOptimizer = optimizers.SGD(learning_rate=.05)

myNNXLModelParams = {'model':'neuralnet_4L', 'optimizer':myOptimizer, 'activation': 'sigmoid', 'epochs':200, 'lr':.05}


myNNXLData = EncodedBostonData.copy()
myNNXLData.columns = list(range(0, len(myNNXLData.columns)))
X= myNNXLData.iloc[:,:-1]
y= myNNXLData.iloc[:,-1]
X = pd.DataFrame(preprocessing.scale(X))
y = pd.DataFrame(preprocessing.scale(y.to_numpy().reshape(-1,1)))


myCOls, myNumFeat, rVals, rCV, Radj = forwardSelectAll(X, y, myNNXLModelParams)

In [None]:
#has made rVals index 0 = -999 because we had issues.  
rVals.pop(0)
rVals.insert(0,0)
print(rVals)
print(Radj)
print(rCV)
graphAttsVRVals(rVals, rCV, Radj, myNumFeat, 'price')

In [None]:
myX = X.iloc[:, [*myCOls]]

myModel = buildModel(myNNXLModelParams['model'], len(myX.columns), myNNXLModelParams['activation'], optimzer=myNNXLModelParams['optimizer'] )
myModel.fit(x=myX, y=y, epochs=200, verbose=0)
_, ax = plt.subplots()
ax.scatter(x= range(0, len(myX)), y=y, c='blue', label='actual')
ax.scatter(x= range(0, len(myX)), y=myModel.predict(myX),c='red', label='predicted' )
plt.legend()
#plt.plot(myX, )

In [None]:
print(myModel.predict(myX))
print(r2_score(y, myModel.predict(myX)) )

In [None]:
print("Features that were selected:")
print(EncodedSeattleData.columns[[*myCOls]])

# Skipped Python Forward Select and Using Scalation ForwardSelected Attributes for NNXL

In [None]:
from sklearn import preprocessing
myOptimizer = optimizers.SGD(learning_rate=.05)
myNNXLModelParams = {'model':'neuralnet_4L', 'optimizer':myOptimizer, 'activation': 'sigmoid', 'epochs':200, 'lr':.05}

myNNXLData = EncodedBostonData.copy()



X= myNNXLData.iloc[:,:-1]
X = X.loc[:,['host_response_rate','host_acceptance_rate', 'accommodates', 'beds', 'bathrooms', 'review_scores_rating', 'guests_included', 'minimum_nights']]
X.columns = list(range(0, len(X.columns)))
y= myNNXLData.iloc[:,-1]
myX = pd.DataFrame(preprocessing.scale(X))
y = pd.DataFrame(preprocessing.scale(y.to_numpy().reshape(-1,1)))

myModel = buildModel(myNNXLModelParams['model'], len(myX.columns), myNNXLModelParams['activation'], optimzer=myNNXLModelParams['optimizer'] )
myModel.fit(x=myX, y=y, epochs=200, verbose=0)

In [None]:
_, ax = plt.subplots()
ax.scatter(x= range(0, len(myX)), y=y, c='blue', label='actual')
ax.scatter(x= range(0, len(myX)), y=myModel.predict(myX),c='red', label='predicted' )
plt.legend()

In [None]:
kfold = KFold(n_splits=5, shuffle=True)
print(myModel.predict(myX))
print('r2')
print(r2_score(y, myModel.predict(myX)) )
print('r2adj')
print(adj_r2(r2_score(y, myModel.predict(myX)), len(myX.columns), len(myX)))
cvScoreArray = []
for train, test in kfold.split(myX, y):
    model = buildModel(myNNXLModelParams['model'], len(myX.columns), myNNXLModelParams['activation'], myNNXLModelParams['optimizer'])
    model.fit(myX.iloc[train, :], y.iloc[train], verbose=0, epochs=myNNXLModelParams['epochs'])
    cvScoreArray.append(r2_score(y.iloc[test], model.predict(myX.iloc[test,:])))
#rCvVals.append(sum(cvScoreArray)/len(cvScoreArray))
print("RCV:")
print(sum(cvScoreArray)/len(cvScoreArray))

In [None]:
print(cvScoreArray)
print(sum(cvScoreArray)/len(cvScoreArray))