In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("glass.csv")

In [2]:
df.shape

(214, 10)

In [3]:
df.columns

Index(['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type'], dtype='object')

In [4]:
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [5]:
#Which column is the output we want to predict?
#The Type column

In [6]:
#Are all columns numeric?
#Yes

In [7]:
#Is there an ID column that should not be used?
#Yes, the Id column is only an identifier and has no meaning

In [8]:
df["y"] = (df["Type"] ==1).astype(int)
df = df.drop(columns=["Type"])
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,y
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [9]:
X = df.drop(columns=["y"]).values
y = df["y"].values

In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(
    X,y,test_size=0.2,random_state=42)

In [11]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [12]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [13]:
def predict_proba(X, w, b):
    z = X @ w + b
    p = sigmoid(z)
    return p

In [14]:
def loss(y, p):
    return -np.mean(y*np.log(p) + (1-y)*np.log(1-p))

In [15]:
def update_weights(X, y, w, b, lr):
    p = predict_proba(X, w, b)
    error = p - y
    w = w - lr * (X.T @ error) / len(y)
    b = b - lr * np.mean(error)
    return w, b

In [16]:
w = np.zeros(X_train.shape[1])
b = 0.0
lr = 0.1
epochs = 100
for _ in range(epochs):
    w, b = update_weights(X_train, y_train, w, b, lr)

In [17]:
def predict_label(p, threshold=0.5):
    return (p >= threshold).astype(int)

In [18]:
p_test = predict_proba(X_test, w, b)

y_pred_05 = predict_label(p_test, 0.5)
y_pred_07 = predict_label(p_test, 0.7)

print("Accuracy (0.5):", np.mean(y_pred_05 == y_test))
print("Accuracy (0.7):", np.mean(y_pred_07 == y_test))

Accuracy (0.5): 0.8604651162790697
Accuracy (0.7): 0.7209302325581395


In [19]:
"""
Logistic regression is different from the perceptron because the perceptron
gives only hard outputs like 0 or 1, while logistic regression gives a
probability between 0 and 1.

The sigmoid matters because it shows how confident the model is about its
prediction instead of giving only yes or no. This makes the model smoother
and more informative.

However, logistic regression can still draw only straight line decision
boundaries, so it cannot solve complex non-linear problems. To solve those
problems, we need neural networks.
"""


'\nLogistic regression is different from the perceptron because the perceptron\ngives only hard outputs like 0 or 1, while logistic regression gives a\nprobability between 0 and 1.\n\nThe sigmoid matters because it shows how confident the model is about its\nprediction instead of giving only yes or no. This makes the model smoother\nand more informative.\n\nHowever, logistic regression can still draw only straight line decision\nboundaries, so it cannot solve complex non-linear problems. To solve those\nproblems, we need neural networks.\n'