In [None]:
#Analyzing the dataset
from scipy.stats import shapiro
import pandas as pd

CONFIG_MAX_DUMMY = "max_dummy"
CONFIG_MAX_DUMMY_PCT = "pct_dummy"

CONFIG = {
  CONFIG_MAX_DUMMY: 1000,
  CONFIG_MAX_DUMMY_PCT: 0.75
}

def isnumeric(datatype):
  return datatype in [FIELD_TYPE_FLOAT,FIELD_TYPE_INT]

FIELDS = "fields"
FIELD_ACTION = "action"
FIELD_ACTION_COPY = "copy"
FIELD_ACTION_IGNORE = "ignore"
FIELD_ACTION_ZSCORE = "zscore"
FIELD_ACTION_NORMALIZE = "normalize"
FIELD_ACTION_DUMMY = "dummy"
FIELD_ACTION_TARGET = "target"
FIELD_NAME = "name"
FIELD_SUM = "sum"
FIELD_TYPE = "type"
FIELD_MEAN = "mean"
FIELD_NUM = "n"
FIELD_MISSING = "missing"
FIELD_MIN = "min"
FIELD_MAX = "max"
FIELD_VAR = "var"
FIELD_SD = "sd"
FIELD_UNIQUE = "unique"
FIELD_MEDIAN = "median"
FIELD_MODE = "mode"
FIELD_SHAPIRO_STAT = "shapiro-stat"
FIELD_SHAPIRO_P = "shapiro-p"
META_TARGET = "target"
META_TYPE = "type"
META_TYPE_BINARY_CLASSIFICATION = "binary-classification"
META_TYPE_CLASSIFICATION = "classification"
META_TYPE_REGRESSION = "regression"
META_SOURCE = "source"
META_POSITIVE_TOKEN = "positive-token"
META_EARLY_STOP = "early-stop"

FIELD_TYPE_FLOAT = "float"
FIELD_TYPE_INT = "int"
FIELD_TYPE_STR = "str"

def find_positive(s):
  s = set(s.str.upper().tolist())
  if len(s) != 2: return None
  if "+" in s and "-" in s: return "+"
  if "0" in s and "1" in s: return "1"
  if "t" in s and "f" in s: return "t"
  if "y" in s and "n" in s: return "y"
  if "true" in s and "false" in s: return "true"
  if "yes" in s and "no" in s: return "yes"
  if "p" in s and "n" in s: return "p"
  if "positive" in s and "negative" in s: return "positive"
  s = list(s)
  s.sort()
  return s[0]

def analyze(data_source, target, is_regression=True):
  df = pd.read_csv(data_source,na_values=['NA', '?'])

  metadata = {
      FIELDS: {},
      META_TARGET: target,
      META_SOURCE: data_source,
      META_EARLY_STOP: True
  }

  fields = metadata[FIELDS]

  for field_name,csv_type in zip(df.columns,df.dtypes):
    #print(name,csv_type)
    if "float" in csv_type.name:
      dtype = FIELD_TYPE_FLOAT
      action = FIELD_ACTION_COPY
    elif "int" in csv_type.name:
      dtype = FIELD_TYPE_INT
      action = FIELD_ACTION_COPY
    else:
      dtype = FIELD_TYPE_STR
      action = FIELD_ACTION_IGNORE

    missing_count = sum(df[field_name].isnull())
    col = df[field_name]
    unique_count = len(pd.unique(col))

    if isnumeric(dtype):
      stat, p = shapiro(col)

      # less than or equal to 0.05 not normal
      action = FIELD_ACTION_ZSCORE if p>0.05 else FIELD_ACTION_NORMALIZE

      fields[field_name] = {
          FIELD_TYPE:dtype,
          FIELD_MEDIAN:col.median(),
          FIELD_MEAN:col.mean(),
          FIELD_SD:col.std(),
          FIELD_MAX:col.max(),
          FIELD_MIN:col.min(),
          FIELD_SHAPIRO_STAT:stat,
          FIELD_SHAPIRO_P:p,
          FIELD_ACTION:action,
          FIELD_MISSING:missing_count,
          FIELD_UNIQUE:unique_count}

    else:
      fields[field_name] = {
          FIELD_TYPE:dtype,
          FIELD_MODE:col.mode()[0],
          FIELD_ACTION:action,
          FIELD_MISSING:missing_count,
          FIELD_UNIQUE:unique_count}

    # Determine action
    field = fields[field_name]
    if (field[FIELD_TYPE] == FIELD_TYPE_STR) and (field[FIELD_UNIQUE]<CONFIG[CONFIG_MAX_DUMMY]) and (field[FIELD_UNIQUE]/len(df)<CONFIG[CONFIG_MAX_DUMMY_PCT]):
      field[FIELD_ACTION] = FIELD_ACTION_DUMMY
    if field_name == target:
      field[FIELD_ACTION] = FIELD_ACTION_TARGET

  # Determine model type
  is_binary = (metadata[FIELDS][target][FIELD_UNIQUE]==2) and not is_regression

  if is_regression:
    metadata[META_TYPE] = META_TYPE_REGRESSION
  else:
    if metadata[FIELDS][target][FIELD_UNIQUE]==2:
      metadata[META_TYPE] = META_TYPE_BINARY_CLASSIFICATION

      metadata[META_POSITIVE_TOKEN] = find_positive(df[target])
    else:
      metadata[META_TYPE] = META_TYPE_CLASSIFICATION

  return metadata

COLS = [FIELD_MEAN, FIELD_SD, FIELD_MEDIAN, FIELD_MODE, FIELD_MAX, FIELD_ACTION, FIELD_UNIQUE, FIELD_SHAPIRO_P,FIELD_MISSING]

def field_summary(metadata, cols=COLS):
  data = {}

  data['name'] = []
  for col in cols:
    data[col] = []

  for field_name in metadata[FIELDS]:
    field = metadata[FIELDS][field_name]
    data['name'].append(field_name)
    for col in cols:
      data[col].append(field.get(col, None))

  return pd.DataFrame(data)[['name']+COLS]

In [None]:
#generating the code
from dataclasses import MISSING
from pandas.core.dtypes.inference import is_re
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

def tolist(obj):
    if isinstance(obj, list) or isinstance(obj, tuple):
        return obj
    else:
        return [obj]

class PythonFile:
    def __init__(self):
        self.imports = []
        self.lines = []

    def add_import(self, name, alias=None):
        if alias:
            self.imports.append({"name": name, "alias": alias})
        else:
            self.imports.append({"name": name})

    def add_from(self, _from, _import):
        self.imports.append({"from": _from, "import": _import})

    def generate(self):
        src = ""
        for obj in self.imports:
            if "name" in obj and "alias" in obj:
                src += f"import {obj['name']} as {obj['alias']}"
            elif "name" in obj and "alias" not in obj:
                src += f"import {obj['name']}"
            elif "from" in obj and "import" in obj:
                imports = ", ".join(tolist(obj['import']))
                src += f"from {obj['from']} import {imports}"

            src += "\n"

        for line in self.lines:
            src += line + "\n"
        return src

    def add_line(self, str):
        self.lines.append(str)

    def comment(self, str):
        return f"# {str}"

    def call(self, name, *args):
        src = name + "("

        formatted_args = []
        started_named = False
        for arg in args:
            if isinstance(arg, dict):
                formatted_args += [f"{name}={arg[name]}" for name in arg.keys()]
                started_named = True
            else:
                if started_named:
                    raise ValueError("positional argument follows keyword argument")
                formatted_args.append(str(arg))

        src += ", ".join(formatted_args)
        src += ")"
        return src

    def assign(self, left, right):
        return f"{left} = {right}"

    def str(self, str):
        return f"\"{str}\""

    def index(self, name, indexes, dot=None):
        src = name
        for idx in indexes:
            src += f'[{idx}]'

        if dot:
            src += '.'
            src += dot
        return src

def generate_keras(metadata, visualize=True, pca_components=None):
    na_values = ['NA', '?']
    target = metadata[META_TARGET]
    is_regression = metadata[META_TYPE] == META_TYPE_REGRESSION
    is_binary = (metadata[FIELDS][target][FIELD_UNIQUE] == 2) and (metadata[META_TYPE] == META_TYPE_CLASSIFICATION)

    if metadata[META_TYPE] == META_TYPE_REGRESSION:
        loss = "mean_squared_error"
    elif metadata[META_TYPE] == META_TYPE_BINARY_CLASSIFICATION:
        loss = "binary_crossentropy"
    else:
        loss = "categorical_crossentropy"

    py = PythonFile()
    # Imports
    py.add_import("pandas", "pd")
    py.add_import("io")
    py.add_import("requests")
    py.add_import("numpy", "np")
    py.add_from("tensorflow.keras.models", "Sequential")
    py.add_from("tensorflow.keras.layers", ["Dense", "Activation"])
    py.add_from("tensorflow.keras.callbacks", "EarlyStopping")
    py.add_from("scipy.stats", "zscore")
    py.add_from("sklearn.preprocessing", "MinMaxScaler")
    py.add_from("sklearn.model_selection", "train_test_split")
    py.add_from("sklearn.metrics", ["accuracy_score", "mean_squared_error", "log_loss", "roc_curve", "auc"])
    if visualize:
        py.add_import("matplotlib.pyplot", "plt")
    if pca_components:
        py.add_import("sklearn.decomposition", "PCA")

    py.add_line(py.assign("df", py.call("pd.read_csv", py.str(metadata[META_SOURCE]), {'na_values': na_values})))
    x_fields = [x for x in metadata[FIELDS] if x != target and metadata[FIELDS][x][FIELD_ACTION] in [FIELD_ACTION_COPY]]
    py.add_line(py.assign("x_fields", x_fields))

    # Analyze input columns
    for field_name in metadata[FIELDS]:
        field = metadata[FIELDS][field_name]
        if field[FIELD_MISSING] > 0:
            if isnumeric(field[FIELD_TYPE]):
                fn = "median"
                suffix = ""
            else:
                fn = "mode"
                suffix = "[0]"
            py.add_line(py.assign(py.index("df", [py.str(field_name)]),
                                  py.index("df", [py.str(field_name)], py.call("fillna",
                                                                           py.index("df", [py.str(field_name)],
                                                                                    py.call(fn) + suffix)
                                                                           ))))
        if field[FIELD_ACTION] == FIELD_ACTION_ZSCORE:
            py.add_line(py.assign(py.index("df", [py.str(field_name)]),
                                  py.call("zscore", py.index("df", [py.str(field_name)]))))
            py.add_line(py.call("x_fields.append", py.str(field_name)))
        elif field[FIELD_ACTION] == FIELD_ACTION_NORMALIZE:
            f1 = py.index("df", [py.str(field_name)])
            f2 = py.index("df", [[field_name]])
            py.add_line(py.assign(f1, py.call("MinMaxScaler().fit_transform", f2)))
            py.add_line(py.call("x_fields.append", py.str(field_name)))
        elif field[FIELD_ACTION] == FIELD_ACTION_DUMMY:
            py.add_line(py.assign("dummies",
                                  py.call("pd.get_dummies",
                                          py.index('df', [py.str(field_name)]),
                                          {'prefix': py.str(field_name), 'drop_first': 'True'})))
            py.add_line("df = pd.concat([df,dummies],axis=1)")
            py.add_line("x_fields += dummies.columns.tolist()")

    py.add_line(py.assign("x", py.index("df", ["x_fields"], "values")))

    if metadata[META_TYPE] == META_TYPE_CLASSIFICATION:
        py.add_line(py.assign("dummies", py.call("pd.get_dummies", py.index("df", [py.str(target)]))))
        py.add_line(py.assign("species", "dummies.columns"))
        py.add_line(py.assign("y", "dummies.values"))
    elif metadata[META_TYPE] == META_TYPE_BINARY_CLASSIFICATION:
        t = py.index("df", [py.str(target)])
        pos = metadata[META_POSITIVE_TOKEN]
        py.add_line(py.assign(t, f"({t}=={py.str(pos)}).astype(int)"))
        py.add_line(py.assign("y", f"df.{target}.values"))
    else:
        py.add_line(py.assign("y", f"df.{target}.values"))

    py.add_line(py.comment("Construct model"))
    # Early stop
    if metadata[META_EARLY_STOP]:
        x_train, y_train, x_test, y_test = "x_train", "y_train", "x_test", "y_test"
        py.add_line(py.comment("Split into validation and training sets"))
        py.add_line(py.assign(f"{x_train}, {x_test}, {y_train}, {y_test}",
                              py.call("train_test_split", "x", "y", {"test_size": 0.25, "random_state": 42})))
    else:
        x_train, y_train, x_test, y_test = "x", "y", "x", "y"

    py.add_line(py.assign("model", py.call("Sequential")))
    py.add_line(py.call("model.add", py.call("Dense", 50, {"input_dim": "x.shape[1]", "activation": py.str('relu')})))
    py.add_line(py.call("model.add", py.call("Dense", 25, {"activation": py.str('relu')})))
    if metadata[META_TYPE] == META_TYPE_REGRESSION:
        py.add_line(py.call("model.add", py.call("Dense", "1")))
    elif metadata[META_TYPE] == META_TYPE_BINARY_CLASSIFICATION:
        py.add_line(py.call("model.add", py.call("Dense", "1", {"activation": py.str('sigmoid')})))
    else:
        py.add_line(
            py.call("model.add", py.call("Dense", "y.shape[1]", {"activation": py.str('softmax')})))
    py.add_line(py.call("model.compile", {"loss": py.str(loss), "optimizer": py.str('adam')}))

    py.add_line(py.comment("Train model"))
    if metadata[META_EARLY_STOP]:
        py.add_line(py.assign("monitor", py.call("EarlyStopping", {"monitor": py.str('val_loss'), "min_delta": "1e-3",
                                                                   "patience": 5,
                                                                   "verbose": 1, "mode": py.str('auto'),
                                                                   "restore_best_weights": True})))
        py.add_line(py.call("model.fit", x_train, y_train,
                            {"validation_data": f"({x_test},{y_test})", 'callbacks': '[monitor]', 'verbose': '2',
                             'epochs': 1000}))
    else:
        py.add_line(py.call("model.fit", x_train, y_train, {'verbose': '2', 'epochs': 100}))

    py.add_line(py.comment("Evaluate model"))
    py.add_line(py.assign("pred", py.call("model.predict", x_test)))
    if metadata[META_TYPE] == META_TYPE_REGRESSION:
        py.add_line(py.comment("Measure RMSE error.  RMSE is common for regression."))
        py.add_line(py.assign("score", py.call("np.sqrt", py.call("metrics.mean_squared_error", "pred", y_test))))
        py.add_line("print(f\"Root mean square (RMSE): {score}\")")
    if metadata[META_TYPE] == META_TYPE_CLASSIFICATION:
        py.add_line(py.assign("predict_classes", py.call("np.argmax", "pred", {"axis": 1})))
        py.add_line(py.assign("expected_classes", py.call("np.argmax", y_test, {"axis": 1})))
        py.add_line(py.assign("correct", py.call("accuracy_score", "expected_classes", "predict_classes")))
        py.add_line("print(f\"Accuracy: {correct}\")")
    elif metadata[META_TYPE] == META_TYPE_BINARY_CLASSIFICATION:
        py.add_line(py.assign("predict_classes", py.call("np.argmax", "pred", {"axis": 1})))
        py.add_line(py.assign("correct", py.call("accuracy_score", y_test, "predict_classes")))
        py.add_line("print(f\"Accuracy: {correct}\")")
        py.add_line(py.assign("fpr, tpr, thresholds",
                              py.call("metrics.roc_curve", y_test, "pred", {"pos_label": 1})))
        py.add_line(py.assign("score", py.call("metrics.auc", "fpr", "tpr")))
        py.add_line("print(f\"Area Under Curve: {score}\")")
    if metadata[META_TYPE] == META_TYPE_CLASSIFICATION or metadata[META_TYPE] == META_TYPE_BINARY_CLASSIFICATION:
        py.add_line(py.assign("score", py.call("metrics.log_loss", y_test, "pred", {'eps': 1e-7})))
        py.add_line("print(f\"Log loss: {score}\")")


    return py.generate()


In [None]:
#DATA_SOURCE = "https://data.heatonresearch.com/data/t81-558/iris.csv"; TARGET = "species";IS_REGRESSION=False
from dataclasses import MISSING
from pandas.core.dtypes.inference import is_re
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Define your data source URL
DATA_SOURCE_URL = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"

# Define your target variable
TARGET = "tax"

# Define whether it's a regression task
IS_REGRESSION = False  # Update according to your task

# Fetch data and generate metadata
metadata = analyze(DATA_SOURCE_URL, TARGET, IS_REGRESSION)
print(metadata)

# Generate summary
summary = field_summary(metadata)
display(summary)

# Disable early stopping
metadata[META_EARLY_STOP] = False

# Generate Python code including data visualizations
python_code = generate_keras(metadata, visualize=True, pca_components=2)
print(python_code)


{'fields': {'crim': {'type': 'float', 'median': 0.25651, 'mean': 3.613523557312254, 'sd': 8.60154510533249, 'max': 88.9762, 'min': 0.00632, 'shapiro-stat': 0.44996488094329834, 'shapiro-p': 1.3285678005931464e-36, 'action': 'normalize', 'missing': 0, 'unique': 504}, 'zn': {'type': 'float', 'median': 0.0, 'mean': 11.363636363636363, 'sd': 23.32245299451514, 'max': 100.0, 'min': 0.0, 'shapiro-stat': 0.5559463500976562, 'shapiro-p': 7.882576753156324e-34, 'action': 'normalize', 'missing': 0, 'unique': 26}, 'indus': {'type': 'float', 'median': 9.69, 'mean': 11.13677865612648, 'sd': 6.860352940897585, 'max': 27.74, 'min': 0.46, 'shapiro-stat': 0.8997918367385864, 'shapiro-p': 1.0642375893751083e-17, 'action': 'normalize', 'missing': 0, 'unique': 76}, 'chas': {'type': 'int', 'median': 0.0, 'mean': 0.0691699604743083, 'sd': 0.25399404134041037, 'max': 1, 'min': 0, 'shapiro-stat': 0.27476072311401367, 'shapiro-p': 2.350467979135232e-40, 'action': 'normalize', 'missing': 0, 'unique': 2}, 'nox':

Unnamed: 0,name,mean,sd,median,mode,max,action,unique,shapiro-p,missing
0,crim,3.613524,8.601545,0.25651,,88.9762,normalize,504,1.3285679999999999e-36,0
1,zn,11.363636,23.322453,0.0,,100.0,normalize,26,7.882577e-34,0
2,indus,11.136779,6.860353,9.69,,27.74,normalize,76,1.064238e-17,0
3,chas,0.06917,0.253994,0.0,,1.0,normalize,2,2.3504680000000002e-40,0
4,nox,0.554695,0.115878,0.538,,0.871,normalize,81,5.775851e-14,0
5,rm,6.284634,0.702617,6.2085,,8.78,normalize,446,2.410727e-10,0
6,age,68.574901,28.148861,77.5,,100.0,normalize,356,2.23113e-18,0
7,dis,3.795043,2.10571,3.20745,,12.1265,normalize,412,2.185128e-17,0
8,rad,9.549407,8.707259,5.0,,24.0,normalize,9,8.072354e-30,0
9,tax,408.237154,168.537116,330.0,,711.0,target,66,1.1629790000000002e-23,0


import pandas as pd
import io
import requests
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss, roc_curve, auc
import matplotlib.pyplot as plt
import sklearn.decomposition as PCA
df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv", na_values=['NA', '?'])
x_fields = []
df["crim"] = MinMaxScaler().fit_transform(df[['crim']])
x_fields.append("crim")
df["zn"] = MinMaxScaler().fit_transform(df[['zn']])
x_fields.append("zn")
df["indus"] = MinMaxScaler().fit_transform(df[['indus']])
x_fields.append("indus")
df["chas"] = MinMaxScaler().fit_transform(df[['chas']])
x_fields.append("chas")
df["nox"] = MinMaxScaler().fit_transform(df

In [None]:
!pip install tensorflow



In [None]:
import pandas as pd
import io
import requests
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss, roc_curve, auc
from sklearn import metrics
df = pd.read_csv("https://data.heatonresearch.com/data/t81-558/iris.csv", na_values=['NA', '?'])
x_fields = []
df["sepal_l"] = MinMaxScaler().fit_transform(df[['sepal_l']])
x_fields.append("sepal_l")
df["sepal_w"] = zscore(df["sepal_w"])
x_fields.append("sepal_w")
df["petal_l"] = MinMaxScaler().fit_transform(df[['petal_l']])
x_fields.append("petal_l")
df["petal_w"] = MinMaxScaler().fit_transform(df[['petal_w']])
x_fields.append("petal_w")
x = df[x_fields].values
dummies = pd.get_dummies(df["species"])
species = dummies.columns
y = dummies.values
# Construct model
model = Sequential()
model.add(Dense(50, input_dim=x.shape[1], activation="relu"))
model.add(Dense(25, activation="relu"))
model.add(Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")
# Train model
model.fit(x, y, verbose=2, epochs=100)
# Evaluate model
pred = model.predict(x)
predict_classes = np.argmax(pred, axis=1)
expected_classes = np.argmax(y, axis=1)
correct = accuracy_score(expected_classes, predict_classes)
print(f"Accuracy: {correct}")
score = metrics.log_loss(y, pred, eps=1e-07)
print(f"Log loss: {score}")


Epoch 1/100
5/5 - 1s - loss: 1.0683 - 725ms/epoch - 145ms/step
Epoch 2/100
5/5 - 0s - loss: 1.0272 - 16ms/epoch - 3ms/step
Epoch 3/100
5/5 - 0s - loss: 0.9927 - 16ms/epoch - 3ms/step
Epoch 4/100
5/5 - 0s - loss: 0.9596 - 17ms/epoch - 3ms/step
Epoch 5/100
5/5 - 0s - loss: 0.9277 - 14ms/epoch - 3ms/step
Epoch 6/100
5/5 - 0s - loss: 0.8971 - 16ms/epoch - 3ms/step
Epoch 7/100
5/5 - 0s - loss: 0.8668 - 17ms/epoch - 3ms/step
Epoch 8/100
5/5 - 0s - loss: 0.8393 - 16ms/epoch - 3ms/step
Epoch 9/100
5/5 - 0s - loss: 0.8101 - 19ms/epoch - 4ms/step
Epoch 10/100
5/5 - 0s - loss: 0.7820 - 19ms/epoch - 4ms/step
Epoch 11/100
5/5 - 0s - loss: 0.7545 - 30ms/epoch - 6ms/step
Epoch 12/100
5/5 - 0s - loss: 0.7291 - 21ms/epoch - 4ms/step
Epoch 13/100
5/5 - 0s - loss: 0.7023 - 26ms/epoch - 5ms/step
Epoch 14/100
5/5 - 0s - loss: 0.6788 - 23ms/epoch - 5ms/step
Epoch 15/100
5/5 - 0s - loss: 0.6545 - 24ms/epoch - 5ms/step
Epoch 16/100
5/5 - 0s - loss: 0.6319 - 25ms/epoch - 5ms/step
Epoch 17/100
5/5 - 0s - loss: 

In [None]:
import pandas as pd
import io
import requests
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss, roc_curve, auc
import matplotlib.pyplot as plt
import sklearn.decomposition as PCA
df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv", na_values=['NA', '?'])
x_fields = []
df["crim"] = MinMaxScaler().fit_transform(df[['crim']])
x_fields.append("crim")
df["crim"] = MinMaxScaler().fit_transform(df[['crim']])
x_fields.append("crim")
df["zn"] = MinMaxScaler().fit_transform(df[['zn']])
x_fields.append("zn")
df["indus"] = MinMaxScaler().fit_transform(df[['indus']])
x_fields.append("indus")
df["chas"] = MinMaxScaler().fit_transform(df[['chas']])
x_fields.append("chas")
df["nox"] = MinMaxScaler().fit_transform(df[['nox']])
x_fields.append("nox")
df["rm"] = MinMaxScaler().fit_transform(df[['rm']])
x_fields.append("rm")
df["age"] = MinMaxScaler().fit_transform(df[['age']])
x_fields.append("age")
df["dis"] = MinMaxScaler().fit_transform(df[['dis']])
x_fields.append("dis")
df["rad"] = MinMaxScaler().fit_transform(df[['rad']])
x_fields.append("rad")
df["ptratio"] = MinMaxScaler().fit_transform(df[['ptratio']])
x_fields.append("ptratio")
df["b"] = MinMaxScaler().fit_transform(df[['b']])
x_fields.append("b")
df["lstat"] = MinMaxScaler().fit_transform(df[['lstat']])
x_fields.append("lstat")
df["medv"] = MinMaxScaler().fit_transform(df[['medv']])
x_fields.append("medv")
x = df[x_fields].values
dummies = pd.get_dummies(df["tax"])
species = dummies.columns
y = dummies.values
# Construct model
model = Sequential()
model.add(Dense(50, input_dim=x.shape[1], activation="relu"))
model.add(Dense(25, activation="relu"))
model.add(Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")
# Train model
model.fit(x, y, verbose=2, epochs=100)
# Evaluate model
pred = model.predict(x)
predict_classes = np.argmax(pred, axis=1)
expected_classes = np.argmax(y, axis=1)
correct = accuracy_score(expected_classes, predict_classes)
print(f"Accuracy: {correct}")
score = metrics.log_loss(y, pred, eps=1e-07)
print(f"Log loss: {score}")

Epoch 1/100
16/16 - 1s - loss: 4.1126 - 710ms/epoch - 44ms/step
Epoch 2/100
16/16 - 0s - loss: 3.9654 - 39ms/epoch - 2ms/step
Epoch 3/100
16/16 - 0s - loss: 3.7613 - 52ms/epoch - 3ms/step
Epoch 4/100
16/16 - 0s - loss: 3.4650 - 45ms/epoch - 3ms/step
Epoch 5/100
16/16 - 0s - loss: 3.1506 - 43ms/epoch - 3ms/step
Epoch 6/100
16/16 - 0s - loss: 2.9987 - 44ms/epoch - 3ms/step
Epoch 7/100
16/16 - 0s - loss: 2.9034 - 36ms/epoch - 2ms/step
Epoch 8/100
16/16 - 0s - loss: 2.8262 - 38ms/epoch - 2ms/step
Epoch 9/100
16/16 - 0s - loss: 2.7594 - 37ms/epoch - 2ms/step
Epoch 10/100
16/16 - 0s - loss: 2.6984 - 41ms/epoch - 3ms/step
Epoch 11/100
16/16 - 0s - loss: 2.6423 - 45ms/epoch - 3ms/step
Epoch 12/100
16/16 - 0s - loss: 2.5882 - 35ms/epoch - 2ms/step
Epoch 13/100
16/16 - 0s - loss: 2.5386 - 44ms/epoch - 3ms/step
Epoch 14/100
16/16 - 0s - loss: 2.4864 - 39ms/epoch - 2ms/step
Epoch 15/100
16/16 - 0s - loss: 2.4410 - 36ms/epoch - 2ms/step
Epoch 16/100
16/16 - 0s - loss: 2.3981 - 38ms/epoch - 2ms/step