In [67]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader
import joblib

In [68]:
import numpy as np
import plotly.graph_objects as go
import torch


def plot_model_learning(
        model, x, y, x_range=[-3, 3], y_range=[-150, 150], dx=1, dy=30
):
    """
    Visualize a dataset and the model's predictions, showing how the model fits the dataset.

    Parameters:
        model: Trained PyTorch model to generate predictions
        x: Input data (numpy array or torch tensor)
        y: True target values (numpy array or torch tensor)
        x_range: x-axis range [min, max] (default: [-3, 3])
        y_range: y-axis range [min, max] (default: [-150, 150])
        dx: Tick interval for x-axis
        dy: Tick interval for y-axis
    Returns:
        fig: A Plotly interactive figure
    """

    if isinstance(x, torch.Tensor):
        x = x.detach().numpy()
    if isinstance(y, torch.Tensor):
        y = y.detach().numpy()

    with torch.no_grad():
        x_tensor = torch.tensor(x, dtype=torch.float32)
        y_hat = model(x_tensor).detach().numpy()

    fig = go.Figure()


    fig.add_trace(
        go.Scatter(x=x.flatten(), y=y.flatten(), mode="markers",
                   marker=dict(size=10, color="blue"),
                   name="True Data")
    )

    # Plot the model's predictions
    fig.add_trace(
        go.Scatter(
            x=x.flatten(),
            y=y_hat.flatten(),
            mode="lines",
            line=dict(width=3, color="red"),
            name="Model Prediction",
        )
    )

    fig.update_layout(
        width=600,
        height=500,
        title="Model Learning Visualization",
        title_x=0.5,
        xaxis_title="Input (x)",
        yaxis_title="Output (y)",
        margin=dict(t=60),
    )
    fig.update_xaxes(range=x_range, tick0=x_range[0], dtick=dx)
    fig.update_yaxes(range=y_range, tick0=y_range[0], dtick=dy)

    return fig



def plot_dataset(x, y, x_range=None, y_range=None, dx=1, dy=30):
    """
    Plot a dataset (x, y) without any predictions.

    Parameters:
        x: Input data (numpy array or torch tensor).
        y: Output/target values (numpy array or torch tensor).
        x_range: x-axis range [min, max]. If None, it adapts automatically.
        y_range: y-axis range [min, max]. If None, it adapts automatically.
        dx: Tick interval for x-axis.
        dy: Tick interval for y-axis.
    Returns:
        fig: A Plotly interactive figure.
    """
    # Ensure `x` and `y` are numpy arrays for easier plotting
    if isinstance(x, torch.Tensor):
        x = x.detach().numpy()
    if isinstance(y, torch.Tensor):
        y = y.detach().numpy()

    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=x.flatten(),
            y=y.flatten(),
            mode="markers",
            marker=dict(size=10, color="blue"),
            name="Data Points"
        )
    )

    # Customize layout
    fig.update_layout(
        width=600,
        height=500,
        title="Dataset Scatter Plot",
        title_x=0.5,
        xaxis_title="Input (x)",
        yaxis_title="Output (y)",
        margin=dict(t=60),
    )

    # Set axis ranges if provided; otherwise, let Plotly auto-scale
    if x_range is not None:
        fig.update_xaxes(range=x_range, tick0=x_range[0], dtick=dx)
    if y_range is not None:
        fig.update_yaxes(range=y_range, tick0=y_range[0], dtick=dy)

    return fig


In [69]:
np.random.seed(42)

steps = np.random.randint(1000, 20000, 1000)
calories_burned = np.random.uniform(1200, 4500, 1000)
sleep_hours = np.random.uniform(4, 12, 1000)

health_score = (
    0.3 * (steps / 20000) * 100 +
    0.4 * ((calories_burned - 1200) / (4500 - 1200)) * 100 +
    0.3 * (sleep_hours / 12) * 100
)

health_score += np.random.normal(0, 5, 1000)

df = pd.DataFrame({
    'steps': steps,
    'calories_burned': calories_burned,
    'sleep_hours': sleep_hours,
    'health_score': health_score
})

print(df.corr())

df.head()
# df.describe()

                    steps  calories_burned  sleep_hours  health_score
steps            1.000000        -0.049403    -0.058327      0.450064
calories_burned -0.049403         1.000000     0.023884      0.721397
sleep_hours     -0.058327         0.023884     1.000000      0.360569
health_score     0.450064         0.721397     0.360569      1.000000


Unnamed: 0,steps,calories_burned,sleep_hours,health_score
0,16795,2546.348835,8.895524,60.469143
1,1860,3020.684564,4.847446,33.014047
2,6390,2037.832261,5.212555,36.502435
3,12964,1847.173229,4.750055,31.040483
4,12284,2868.176001,8.617382,60.650668


In [70]:
X = df.drop('health_score', axis=1)
y = df['health_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [71]:
scalar = StandardScaler()

X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

joblib.dump(scalar, "models/scaler.pkl")

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

X_train_tensor

tensor([[ 0.4044,  0.4760,  0.8436],
        [ 0.0682,  1.7618, -0.9887],
        [-0.1995,  1.3446,  0.2374],
        ...,
        [-0.7099, -0.4851,  0.4493],
        [ 1.2707,  0.3790,  0.5559],
        [-1.6231, -1.1941, -0.1826]])

In [72]:
import pandas as pd

correlation_matrix = pd.DataFrame(X_train, columns=['steps', 'calories_burned', 'sleep_hours'])
correlation_matrix['health_score'] = y_train.values

print(correlation_matrix.corr())

                    steps  calories_burned  sleep_hours  health_score
steps            1.000000        -0.065849    -0.076856      0.447322
calories_burned -0.065849         1.000000     0.034754      0.713229
sleep_hours     -0.076856         0.034754     1.000000      0.360362
health_score     0.447322         0.713229     0.360362      1.000000


In [73]:
class HealthDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

train_dataset = HealthDataset(X_train_tensor, y_train_tensor)
test_dataset = HealthDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [74]:
import torch.nn as nn

class HealthScoreNN(nn.Module):
    def __init__(self, input_size):
        super(HealthScoreNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.Linear(64, 32),
            nn.ReLU(),
            # nn.Dropout(0.1),
            nn.Linear(32, 16),
            nn.ReLU(),
            # nn.Dropout(0.1),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.fc(x)

model = HealthScoreNN(input_size=3)

# for name, param in model.named_parameters():
#     print(f"Parameter name: {name}, Shape: {param.shape}")


In [75]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-6)

In [76]:
epochs = 100
epoch_loss = []

for epoch in range(epochs):
    model.train()
    train_loss = []

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        preds = model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())

    avg_train_loss = sum(train_loss) / len(train_loader)
    epoch_loss.append(avg_train_loss)
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {avg_train_loss:.4f}")

Epoch 1/100, Train Loss: 3244.7228
Epoch 2/100, Train Loss: 3108.5842
Epoch 3/100, Train Loss: 2640.6535
Epoch 4/100, Train Loss: 1541.4813
Epoch 5/100, Train Loss: 294.6686
Epoch 6/100, Train Loss: 33.2894
Epoch 7/100, Train Loss: 25.6422
Epoch 8/100, Train Loss: 25.6108
Epoch 9/100, Train Loss: 25.4265
Epoch 10/100, Train Loss: 25.5501
Epoch 11/100, Train Loss: 25.6031
Epoch 12/100, Train Loss: 25.5892
Epoch 13/100, Train Loss: 25.5346
Epoch 14/100, Train Loss: 25.4726
Epoch 15/100, Train Loss: 25.5238
Epoch 16/100, Train Loss: 25.5640
Epoch 17/100, Train Loss: 25.4302
Epoch 18/100, Train Loss: 25.4792
Epoch 19/100, Train Loss: 25.4527
Epoch 20/100, Train Loss: 25.4521
Epoch 21/100, Train Loss: 25.5061
Epoch 22/100, Train Loss: 25.5457
Epoch 23/100, Train Loss: 25.5001
Epoch 24/100, Train Loss: 25.8145
Epoch 25/100, Train Loss: 25.5859
Epoch 26/100, Train Loss: 25.4469
Epoch 27/100, Train Loss: 25.6040
Epoch 28/100, Train Loss: 25.5184
Epoch 29/100, Train Loss: 25.5129
Epoch 30/100, 

In [77]:
# plot_model_learning(model, X_train_tensor, y_train_tensor)


In [78]:
model.eval()

test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        preds = model(X_batch)
        loss = criterion(preds, y_batch)
        test_loss += loss.item()

    test_loss = test_loss / len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

Test Loss: 33.6428


In [79]:
epochs = 100  # Number of epochs
train_loss = [0.1 / (epoch + 1) for epoch in range(epochs)]  # Simulated training loss values

# Correct plotting
plt.figure(figsize=(8, 6))
plt.plot(range(1, epochs + 1), train_loss, marker='o', label='Training Loss')
plt.title('Training Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.show()


NameError: name 'plt' is not defined

In [576]:
from sklearn.metrics import r2_score, mean_squared_error

# Predict on the entire test set and calculate R2
with torch.no_grad():
    predictions = model(X_test_tensor).numpy()
r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)

print(f"R² Score: {r2:.4f}")
print("MSE:", mse)

X_test_tensor



R² Score: 0.8967
MSE: 28.963097107236127


tensor([[ 7.8303e-01,  1.6264e+00, -4.6884e-01],
        [ 6.8661e-01, -1.0675e+00,  1.4388e+00],
        [ 1.2112e+00, -4.5419e-01,  1.7422e-01],
        [ 1.2834e-01,  1.3096e+00, -1.7587e+00],
        [ 2.6330e-01,  1.1856e+00,  3.7306e-01],
        [ 1.7469e-01,  5.5358e-01, -1.4295e+00],
        [ 7.1285e-01, -1.3567e+00, -1.3163e-01],
        [ 1.7991e-01,  1.3959e+00, -9.9924e-03],
        [-6.3915e-01,  5.8012e-01,  4.7133e-01],
        [-3.1562e-01, -1.5867e+00, -5.8285e-01],
        [-1.0669e+00,  1.4476e+00,  7.6593e-01],
        [-2.2627e-01,  1.2339e+00,  8.7095e-01],
        [-8.1302e-01,  1.1781e+00,  1.5792e+00],
        [-1.3891e+00,  6.4186e-01,  1.1498e+00],
        [-9.3252e-01,  1.4499e+00,  1.1772e+00],
        [-1.5651e+00,  1.1301e+00,  1.6002e-01],
        [ 1.5157e+00,  1.6717e+00,  7.7792e-01],
        [-1.1784e+00,  3.0364e-02,  1.4378e+00],
        [ 1.5312e+00,  8.7537e-01,  5.5825e-01],
        [ 7.7838e-01,  6.5820e-01, -9.9476e-01],
        [-3.2363e-01

In [577]:
torch.save(model, "models/health_score_model.pth")

model = torch.load("models/health_score_model.pth")
model.eval()

HealthScoreNN(
  (fc): Sequential(
    (0): Linear(in_features=3, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
    (5): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [578]:
saved_object = torch.load('models/health_score_model.pth')
print(type(saved_object))

<class '__main__.HealthScoreNN'>


In [579]:
print(model(torch.tensor([[22, 2223, 3]], dtype=torch.float32)))

tensor([[26105.7051]], grad_fn=<AddmmBackward0>)


In [580]:
print(df['health_score'].describe())


count    1000.000000
mean       55.706344
std        15.723758
min        15.052268
25%        44.440922
50%        55.958271
75%        67.306735
max        98.282510
Name: health_score, dtype: float64
