# **Environment Setup and Data Loading**
### We start by importing the necessary libraries and loading the Glass Identification dataset. As per the lab requirements, we use `pandas` to handle the data table where rows represent samples and columns represent features.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the glass dataset
df = pd.read_csv("glass.csv")

# Check the table structure
print(df.shape)   # Number of rows and columns
print(df.columns) # See column names
df.head()         # Look at first few rows

(214, 10)
Index(['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type'], dtype='object')


Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


# **Defining the Learning Task (Binary Classification)**
### The original dataset contains multiple glass types, but we are converting it into a binary problem. We define Type 1 glass as our positive class ($y=1$) and all other types as the negative class ($y=0$). This is a design choice to simplify the decision rule.

In [2]:
# Create binary labels
df["y"] = (df["Type"] == 1).astype(int)

# Remove original Type column
df = df.drop(columns=["Type"])

# Separate inputs (X) and output (y)
X = df.drop(columns=["y"]).values
y = df["y"].values

# **Preprocessing (Split and Scale)**
### We split the data to ensure we can test the model on unseen data, preventing false confidence. We also use a StandardScaler because glass features have different numeric ranges. Without scaling, the sigmoid function can "saturate," making learning unstable.

In [3]:
# Split data into 80% train and 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# **The Sigmoid and Forward Pass**
### Instead of a hard step function, we implement the Sigmoid function: $\sigma(z) = \frac{1}{1+e^{-z}}$. This allows the model to output a value between 0 and 1, representing "confidence" or "belief" rather than a forced decision.

In [4]:
def sigmoid(z):
    # Return sigmoid of z
    return 1 / (1 + np.exp(-z))

def predict_proba(X, w, b):
    # Compute score z and convert to probability p
    z = X @ w + b
    p = sigmoid(z)
    return p

# **Loss Function and Weight Updates**
### We use Binary Cross Entropy loss. Unlike the perceptron rule, this loss penalizes "wrong confidence"â€”a confident wrong prediction receives a much larger penalty than an uncertain one. The learning step uses the error $(p-y)$ to adjust the weights and bias in the correct direction.

In [5]:
def loss(y, p):
    # Compute binary cross entropy
    return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))

def update_weights(X, y, w, b, lr):
    p = predict_proba(X, w, b)
    error = p - y

    # Update weights and bias
    w = w - lr * (X.T @ error) / len(y)
    b = b - lr * np.mean(error)
    return w, b

# **The Training Loop**

### We initialize the weights to zero and run the loop for 100 epochs. Over time, the model should become "less wrong," and the probabilities it outputs should become more meaningful.

In [6]:
# Initialize weights and bias
w = np.zeros(X_train.shape[1])
b = 0.0
lr = 0.1
epochs = 100

# Training Loop
for _ in range(epochs):
    w, b = update_weights(X_train, y_train, w, b, lr)

# **Policy Selection (Thresholding)**
### The final step is moving from probability to a label. This is a policy choice, not a model requirement. In glass quality control, a higher threshold (like 0.7) is often "safer" because it requires stronger evidence before accepting a sample as "Type 1".

In [7]:
def predict_label(p, threshold=0.5):
    # Decision at a probability threshold [cite: 127, 345]
    return (p >= threshold).astype(int)

# Get test probabilities
p_test = predict_proba(X_test, w, b)

# Test with 0.5 and 0.7 thresholds [cite: 348, 349]
labels_50 = predict_label(p_test, 0.5)
labels_70 = predict_label(p_test, 0.7)

print("Decisions at 0.5:", labels_50)
print("Decisions at 0.7:", labels_70)

Decisions at 0.5: [0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0
 1 0 0 0 0 1]
Decisions at 0.7: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0]
