In [61]:
import numpy as np
import pandas as pd

In [62]:
!pip install scikit-learn



In [63]:
from sklearn.model_selection import train_test_split

In [64]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [65]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [66]:
def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

In [67]:
def compute_cost(y, y_pred):
    m = len(y)
    cost = -(1 / m) * np.sum(y * np.log(y_pred + 1e-10) + (1 - y) * np.log(1 - y_pred + 1e-10))
    return cost

In [68]:
def compute_gradients(X, y, y_pred):
    m = len(y)
    dw = (1 / m) * np.dot(X.T, (y_pred - y))
    db = (1 / m) * np.sum(y_pred - y)
    return dw, db

In [69]:
def train_logistic_regression(X, y, learning_rate=0.01, n_iterations=1000):
    n_features = X.shape[1]
    weights, bias = initialize_weights(n_features)
    cost_history = []

    for i in range(n_iterations):
        z = np.dot(X, weights) + bias
        y_pred = sigmoid(z)
        cost = compute_cost(y, y_pred)
        cost_history.append(cost)
        dw, db = compute_gradients(X, y, y_pred)
        weights -= learning_rate * dw
        bias -= learning_rate * db
    return weights, bias, cost_history

In [70]:
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias
    y_pred = sigmoid(z)
    return np.where(y_pred >= 0.5, 1, 0)

In [71]:
def evaluate_model(y_true, y_pred):
    accuracy = np.mean(y_true == y_pred)
    return accuracy

In [73]:
data = pd.read_csv("product_defect_detection.csv")
data

Unnamed: 0,Product_ID,Assembly_Line,Sensor_Reading_1,Sensor_Reading_2,Temperature,Humidity,Defective
0,P001,Line_A,50.0,30.0,70.0,40.0,1
1,P002,Line_A,53.0,28.8,70.5,40.8,0
2,P003,Line_A,56.0,27.6,71.0,41.6,0
3,P004,Line_A,51.5,26.4,71.5,42.4,0
4,P005,Line_A,54.5,31.2,72.0,43.2,0
...,...,...,...,...,...,...,...
95,P096,Line_B,102.5,54.0,70.0,43.2,0
96,P097,Line_B,98.0,58.8,70.5,44.0,0
97,P098,Line_B,101.0,57.6,71.0,44.8,0
98,P099,Line_B,104.0,56.4,71.5,40.0,0


In [74]:
data['Sensor_Reading_1'] = pd.to_numeric(data['Sensor_Reading_1'], errors='coerce')
data['Sensor_Reading_2'] = pd.to_numeric(data['Sensor_Reading_2'], errors='coerce')
data['Temperature'] = pd.to_numeric(data['Temperature'], errors='coerce')
data['Humidity'] = pd.to_numeric(data['Humidity'], errors='coerce')

In [75]:
X = data[['Sensor_Reading_1', 'Sensor_Reading_2', 'Temperature', 'Humidity']]
y = data['Defective']

In [76]:
X.dropna(inplace=True)
y_encoded = y[X.index.isin(X.dropna().index)]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.dropna(inplace=True)


In [77]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y_encoded, test_size=0.2, random_state=42)

In [78]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [79]:
weights, bias, cost_history = train_logistic_regression(X_train_scaled, y_train.values, learning_rate=0.01, n_iterations=1000)

In [80]:
y_pred = predict(X_test_scaled,weights,bias)

In [81]:
accuracy = evaluate_model(y_test.values, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 70.00%
