In [None]:
!pip install torchviz

In [14]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from scipy.stats import f

import statsmodels.api as sm
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torchviz import make_dot

import matplotlib.pyplot as plt
import os

from IPython.display import Image

**FUNZIONI PER VALUTARE LE PREDIZIONI SUL TEST SET**

In [23]:
def evaluate_predictions(X_test, y_test, y_pred):
    # R-squared (R²)
    r2 = r2_score(y_test, y_pred)

    # Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, y_pred)

    # Mean Squared Error (MSE)
    mse = mean_squared_error(y_test, y_pred)

    # Root Mean Squared Error (RMSE)
    rmse = np.sqrt(mse)

    # Mean Absolute Percentage Error (MAPE)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    # Residual Standard Error (RSE)
    # Residuals are the differences between the true values and the predictions
    residuals = y_test - y_pred
    # For simple linear regression, degrees of freedom = n - 2
    rse = np.sqrt(np.sum(residuals**2) / (len(y_test) - 2))

    # Output all the results
    print(f"R-squared (R²): {r2:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.4f}%")
    print(f"Residual Standard Error (RSE): {rse:.4f}")
    # Create the scatter plot
    plt.scatter(X_test, y_test, color='blue', marker='o',
                label='ground truth BHB values')
    plt.scatter(X_test, y_pred, color='red', marker='+',
                label='predicted BHB values')
    plt.axhline(y=1.2, color='green', linestyle='--',
                linewidth=2, label='Threshold per diagnosi')
    # Set labels and title
    plt.xlabel('Glu (mmol/L)')
    plt.ylabel('BHB (mmol/L)')
    plt.title('Scatter plot: ground truth and predictions on test set')

    # Add a legend
    plt.legend()

    # Display the plot
    plt.show()

    # Create a histogram
    plt.figure(figsize=(8, 6))  # Adjust figure size if needed
    # kde=True adds a kernel density estimate
    sns.histplot(residuals, kde=True, bins=30)
    plt.title('Distribuzione dei residui')
    plt.xlabel('Residuai')
    plt.ylabel('Frequenza')
    plt.show()

    # Create a Q-Q plot (optional)
    sm.qqplot(residuals, line='45', fit=True)
    plt.title('Q-Q Plot dei residui')
    plt.show()


# Function to compute TP, TN, FP, FN
def compute_confusion_matrix(boolean_predictions, boolean_ground_truth):
    TP = np.sum((boolean_predictions == True) & (
        boolean_ground_truth == True))   # Both True
    TN = np.sum((boolean_predictions == False) & (
        boolean_ground_truth == False))  # Both False
    # Predicted True, but False in ground truth
    FP = np.sum((boolean_predictions == True) &
                (boolean_ground_truth == False))
    # Predicted False, but True in ground truth
    FN = np.sum((boolean_predictions == False) &
                (boolean_ground_truth == True))
    return TP, TN, FP, FN


def compute_classification_metrics(TP, TN, FP, FN):
  """
  Computes accuracy, recall, and F1 score.

  Args:
    TP: True positives.
    TN: True negatives.
    FP: False positives.
    FN: False negatives.

  Returns:
    A tuple containing accuracy, recall, and F1 score.
  """
  accuracy = None
  recall = None
  precision = None
  f1_score = None
  if (TP + TN + FP + FN) > 0:
      accuracy = (TP + TN) / (TP + TN + FP + FN)
  if (TP + FN) > 0:
      recall = TP / (TP + FN)
  if (TP + FP) > 0:
      precision = TP / (TP + FP)
  if precision is not None and recall is not None and (precision + recall) > 0:
      f1_score = 2 * (precision * recall) / (precision + recall)

  return accuracy, recall, precision, f1_score

Cliccare sull'icona della cartella qui a sinistra. Quando si apre lo spazio di lavoro dei file, trascinare il file 'Holstein_diary_cows.csv' dal proprio computer all'area qui a sinistra. In seguito eseguire la prossima cella.

In [4]:
cvs_source= 'Holstein_diary_cows.csv'
df = pd.read_csv(cvs_source)

**Eseguire la prossima cella per visualizzare il dataset**

In [None]:
# Create the scatter plot
plt.scatter(df['Glu'], df['BHB'])

# Set labels and title
plt.xlabel('Glu (mmol/L)')
plt.ylabel('BHB (mmol/L)')
plt.title('Scatter plot: Glu vs BHB')

# Add a legend
plt.legend()

# Display the plot
plt.show()

**DIVIDI IL DATASET IN 80% TRAIN E 20% TEST**

In [6]:
X = df[['Glu']]  # Features (independent variable)
y = df['BHB']    # Target (dependent variable)

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**CREA E ALLENA UNA FULLY CONNECTED NEURAL NETWORK**

In [None]:
# Define the neural network model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.hidden = nn.Linear(1, 10)  # Input layer to hidden layer (10 neurons)
        self.activation = nn.ReLU()    # Activation function
        self.output = nn.Linear(10, 1) # Hidden layer to output layer

    def forward(self, x):
        x = self.hidden(x)
        x = self.activation(x)
        x = self.output(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleNN()
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

X_train_tensor = torch.from_numpy(X_train.values).float()
y_train_tensor = torch.from_numpy(
    y_train.values).float().view(-1, 1)  # Reshape to (n_samples, 1)


# Training loop
epochs = 70
for epoch in range(epochs):
    # Forward pass
    y_pred = model(X_train_tensor)
    loss = criterion(y_pred, y_train_tensor)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

**VISUALIZZA LA RETE NEURALE**

In [None]:
# Create a dummy input tensor with the correct input size
dummy_input = torch.randn(1, 1)  # Batch size of 1, input size of 1

# Perform a forward pass to generate the computation graph
output = model(dummy_input)

# Use torchviz to visualize the computation graph
dot = make_dot(output, params=dict(model.named_parameters()))
dot.render("fully_connected_model", format="png", view=True)

**APPLICA IL MODELLO SUL TEST SET E VALUTA LE SUE PREDIZIONI**

In [None]:
if not isinstance(X_test, np.ndarray):
    X_test = X_test.values

if not isinstance(y_test, np.ndarray):
    y_test = y_test.values

# Convert X_test to a PyTorch tensor
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float().view(-1, 1)

# Evaluate the model on the test set
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()

y_pred = y_pred.flatten()

if not isinstance(y_test, np.ndarray):
  y_test = y_test.values

threshold = 1.2  # The threshold value

# Create boolean arrays
bool_y_test = y_test >= threshold
bool_y_pred = y_pred >= threshold

# Compute metrics for each prediction array
TP, TN, FP, FN = compute_confusion_matrix(bool_y_pred, bool_y_test)
accuracy, recall, precision, f1_score = compute_classification_metrics(
    TP, TN, FP, FN)

print("Matrice di confusione:")
print(f"TP: {TP}")
print(f"TN: {TN}")
print(f"FP: {FP}")
print(f"FN: {FN}")
print("Metriche del classificatore:")
print(f"Accuratezza: {accuracy}")
print(f"Richiamo: {recall}")
print(f"Precisione: {precision}")
print(f"F1: {f1_score}")