In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("glass.csv")

In [3]:
print(f"Shape of the dataframe: {df.shape}")
print(f"Columns: {df.columns}")
print(df.head())

Shape of the dataframe: (214, 10)
Columns: Index(['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type'], dtype='str')
        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1


We want only the Type 1 to be accepted, rest types to be rejected

In [4]:
df["y"] = (df["Type"]==1).astype(int)
df=df.drop(columns=["Type"])

In [5]:
df

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,y
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,0
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,0
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,0
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,0


Counting the number of 1s and number of 0s

In [6]:
print(sum(df["y"]==1))  #glass of interest
print(sum(df["y"]==0))  #Reject this

70
144


Separate the input and output

In [7]:
X= df.drop(columns=["y"]).values
y= df["y"].values

In [8]:
from sklearn.model_selection import train_test_split
#spliting the data

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

SKALING

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Defining functions

Sigmoid Function

In [10]:
def sigmoid(z):
    return (1/(1+np.exp(-z)))

Forward Computation

In [11]:
def predict_prob(X,w,b):
    #compute z: the weighted score + bias
    z= X@w +b
    #return sigmoid of z to convert the score to probability
    return sigmoid(z)


Loss Function

In [12]:
def loss(y,p):
    #y is the true label
    #p is the predicted label
    return -np.mean(y*np.log(p) + (1-y)*np.log(1-p))

Learning Step

In [13]:
def update_weights(X,y,w,b,lr):
    p = predict_prob(X,w,b)
    error= p-y

    w= w-lr *(X.T@error)/len(y)
    b= b- lr * np.mean(error)

    return w,b

Training Loop

In [14]:
w= np.zeros(X_train.shape[1])
b=0.0
lr= 0.1
epochs= 100

for _ in range(epochs):
    w, b= update_weights(X_train,y_train,w,b,lr)

Making the decision

In [15]:
def predict_label(p, threshold=0.5):
    return (p>=threshold).astype(int)

In [16]:
print(w)
print(b)

[ 0.10796659 -0.15606355  0.63480916 -0.67204019  0.07051998 -0.08936205
 -0.19026934 -0.16364966 -0.1099809 ]
-0.7346250485229616


I will run the model on the original dataset and find the output and compare with the original result that is df["y"]

In [17]:
p= predict_label(predict_prob(X,w,b))

df["predicted_0.5"]= predict_label(p, threshold=0.5)
df["predicted_0.7"]= predict_label(p, threshold=0.7)
df


Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,y,predicted_0.5,predicted_0.7
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1,1,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1,1,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1,1,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1,1,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,0,0,0
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,0,0,0
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,0,0,0
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,0,0,0


In [18]:
print(p.min(),p.max())


0 1


In [2]:
df.to_csv("glass_with_predictions.csv", index=False)


NameError: name 'df' is not defined