In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


## Shifting and rescaling data

In [None]:
#receives a csv and returns a regularized dataframe
def regularize_data(X):
    df = pd.read_csv(X)
    df_copy = df.copy()
    continuous_features = ['sbp', 'tobacco', 'ldl', 'adiposity','typea', 'obesity', 'alcohol', 'age']

    for feat in continuous_features:
        #shifts the values to have mean 0 and std 1
        df_copy[feat] = (df[feat] - df[feat].mean()) / df[feat].std()

    #normalize non continuous features
    df_copy['famhist'] = df_copy['famhist'].map({'Present': 1, 'Absent': 0})

    return df_copy

normalized_df = regularize_data('data.csv')

#plot sbp before and after normalization side by side
plt.figure(figsize=(10, 6))
df = pd.read_csv('data.csv')
sns.histplot(df['sbp'], bins=30, kde=True)
plt.title('Distribution of Original SBP')
plt.xlabel('Original SBP')
plt.ylabel('Frequency')
plt.show()


plt.figure(figsize=(10, 6))
sns.histplot(normalized_df['sbp'], bins=30, kde=True)
plt.title('Distribution of Normalized SBP')
plt.xlabel('Normalized SBP')
plt.ylabel('Frequency')
plt.show()



# Neural Network


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import cross_val_score, KFold


df = regularize_data("data.csv")
#df = pd.read_csv("data.csv")
#df['famhist'] = df['famhist'].map({'Present': 1, 'Absent': 0})
target = 'adiposity'

drop_cols = ["row.names"]
X = df.drop(columns=[target] + drop_cols)
y = df[target]

def build_ann(n_features_in_=X.shape[1]):
    model = Sequential([
        Dense(16, activation="relu", input_shape=(n_features_in_,)),
        Dense(8, activation="relu"),
        Dense(1)  # regression output
    ])
    model.compile(optimizer="adam", loss="mse", metrics=["mae"])
    return model

#Stop training when a monitored metric has stopped improving
early = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

reg = KerasRegressor(
    model=build_ann,
    #model__n_features_in_=X.shape[1],
    epochs=200,
    batch_size=32,
    verbose=0,
    validation_split=0.2, #only here because of early stopping, to avoid overfitting
    fit__callbacks=[early],
)


cv = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(reg, X, y, cv=cv, scoring='neg_mean_absolute_error', n_jobs=-1)
print("MAE per fold:", -scores)
print("Mean MAE:", -scores.mean())