<a href="https://colab.research.google.com/github/M-L-Liebl/3d_printer/blob/master/pytorch_3d_printer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Building a Basic Neural Network with PyTorch

### Objective
In this exercise, we’ll build a simple neural network using PyTorch to predict **tensile strength** from various 3D printing parameters. This exercise will cover:
- Data preprocessing and splitting
- Defining a neural network architecture in PyTorch
- Training and evaluating the model
- Visualizing the training progress


### Step 1: Load and Inspect Data

In [2]:
import pandas as pd

# Load the dataset
url = "https://raw.githubusercontent.com/beginners-machine-learning-london/intro_to_machine_learning_with_pytorch/refs/heads/master/assets/3dprinter.csv"
data = pd.read_csv(url)

# Display the first few rows
data.head()


Unnamed: 0,layer_height,wall_thickness,infill_density,infill_pattern,nozzle_temperature,bed_temperature,print_speed,material,fan_speed,tension_strenght,elongation,roughness
0,0.02,8.0,90.0,grid,220.0,60.0,40.0,abs,0.0,18.0,1.2,25.0
1,0.02,7.0,90.0,honeycomb,225.0,65.0,40.0,abs,25.0,16.0,1.4,32.0
2,0.02,1.0,80.0,grid,230.0,70.0,40.0,abs,50.0,8.0,0.8,40.0
3,0.02,4.0,70.0,honeycomb,240.0,75.0,40.0,abs,75.0,10.0,0.5,68.0
4,0.02,6.0,90.0,grid,250.0,80.0,40.0,abs,100.0,5.0,0.7,92.0


### Step 2: Data Preprocessing

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
from torch.utils.data import DataLoader, TensorDataset

data = data.dropna()
# Separate features and target (roughness)
X = data.drop(columns=["roughness"])
y = data["roughness"]

# Encode categorical features
X = X.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtype == "object" else col)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors with the float32 data type
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

# Split data into training and test sets with a 80-20 split
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Create data loaders with 32 samples per batch. Shuffle the training set
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=True)

### Step 3: Define the Neural Network Model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# Define a basic neural network model
# Architecture: 3 fully connected layers with ReLU activation
# Input size: Number of features in the dataset
# Hidden Layers: 64, 32 neurons respectively with ReLU activation function
# Output size: 1 (roughness)
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = NeuralNetwork(input_size=X_train.shape[1])
print(model)

### Step 4: Set Up the Training Process

In [None]:
import torch.optim as optim

# Define loss function and optimizer for the neural network
# Select Mean Squared Error Loss and Adam optimizer with a learning rate of 0.001
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Step 5: Train the Neural Network

In [None]:
# Training parameters
num_epochs = 100
train_losses = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Average loss for the epoch
    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}")

print("Training complete.")

### Step 6: Evaluate the Model

In [None]:
model.eval()
test_loss = 0.0

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item()

test_loss /= len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

### Step 7: Visualize Training Loss

In [None]:
import matplotlib.pyplot as plt

# Plot training loss over epochs
plt.plot(train_losses, label="Training Loss")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Training Loss over Epochs")
plt.legend()
plt.show()

### Step 8: Evaluate Model

In [None]:
# Ensure the model is in evaluation mode
model.eval()

# Convert the entire dataset (X_scaled) to a PyTorch tensor
X_full_tensor = torch.tensor(X_scaled, dtype=torch.float32)

# Get predictions on the full dataset
with torch.no_grad():
    predictions_full = model(X_full_tensor).squeeze().tolist()

# Add predictions and calculate squared error (MSE) for each prediction
data = data.dropna()
data["Predicted_roughness"] = predictions_full
data["Squared_Error"] = (data["roughness"] - data["Predicted_roughness"]) ** 2

# Display the final DataFrame with all columns, predictions, and squared error (MSE)
print("Final DataFrame with Predictions and MSE:")
display(data)

In [None]:
# Calculate and print the overall Mean Squared Error on the full dataset
overall_mse = data["Squared_Error"].mean()
print(f"Overall Mean Squared Error on Full Dataset: {overall_mse:.4f}")

In [None]:
# Visualization: Actual vs. Predicted
plt.figure(figsize=(8, 6))
plt.scatter(data["roughness"], data["Predicted_roughness"], alpha=0.7)
plt.plot([data["roughness"].min(), data["roughness"].max()],
         [data["roughness"].min(), data["roughness"].max()], 'r--', label='Ideal Fit')
plt.title("Actual vs. Predicted Roughness")
plt.xlabel("Actual Roughness")
plt.ylabel("Predicted Roughness")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Visualization: Residuals Histogram
plt.figure(figsize=(8, 6))
residuals = data["roughness"] - data["Predicted_roughness"]
plt.hist(residuals, bins=30, alpha=0.7, edgecolor='k')
plt.title("Residuals Distribution")
plt.xlabel("Residual (Actual - Predicted)")
plt.ylabel("Frequency")
plt.grid()
plt.show()