# Predictive Quality Control for Manufacturing

## Importing Required Libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

## Data Preprocessing

In [None]:
def preprocess_data(file_path):
    data = pd.read_csv(file_path)
    data.dropna(inplace=True)
    data = pd.get_dummies(data, drop_first=True)
    X = data.drop(columns=['defect_rate'])
    y = data['defect_rate']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test, data

## Exploratory Data Analysis (EDA)

In [None]:
def perform_eda(data):
    plt.figure(figsize=(12,8))
    sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
    plt.title('Feature Correlation Heatmap')
    plt.show()
    
    for col in data.columns:
        if col != 'defect_rate':
            sns.scatterplot(x=data[col], y=data['defect_rate'])
            plt.title(f'{col} vs Defect Rate')
            plt.xlabel(col)
            plt.ylabel('Defect Rate')
            plt.show()

## Predictive Modeling

In [None]:
def train_model(X_train, y_train):
    model = LinearRegression()
    model.fit(X_train, y_train)
    joblib.dump(model, 'defect_prediction_model.pkl')
    return model

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, r2, y_pred

## Quality Control Integration

In [None]:
def plot_spc_chart(y_test, y_pred):
    plt.figure(figsize=(10, 5))
    plt.plot(y_test.values, label='Actual Defect Rate', marker='o')
    plt.plot(y_pred, label='Predicted Defect Rate', linestyle='dashed')
    plt.axhline(y_test.mean(), color='r', linestyle='dashed', label='Mean Defect Rate')
    plt.xlabel('Sample Index')
    plt.ylabel('Defect Rate')
    plt.legend()
    plt.title('Statistical Process Control Chart')
    plt.show()

def trigger_alarm(y_pred, threshold):
    alerts = np.where(y_pred > threshold)[0]
    if len(alerts) > 0:
        print(f'Warning: Predicted defects exceed threshold at indices {alerts}')
    else:
        print('No defect alarms.')

## Six Sigma Framework

In [None]:
def calculate_dpmk(y_actual, y_predicted, threshold=0.05):
    defects = np.sum(np.abs(y_actual - y_predicted) > threshold)
    opportunities = len(y_actual)
    return (defects / opportunities) * 1_000_000

def calculate_cpk(data, usl, lsl):
    mean = data.mean()
    std_dev = data.std()
    cpu = (usl - mean) / (3 * std_dev)
    cpl = (mean - lsl) / (3 * std_dev)
    return min(cpu, cpl)

## Running the Workflow

In [None]:
# Load and preprocess the dataset
file_path = 'manufacturing_qc_data.csv'  # Update this with the actual dataset path
X_train, X_test, y_train, y_test, data = preprocess_data(file_path)

In [None]:
# Perform EDA
perform_eda(data)

In [None]:
# Train the predictive model
model = train_model(X_train, y_train)

In [None]:
# Evaluate the model
mse, r2, y_pred = evaluate_model(model, X_test, y_test)
print(f'Model Performance: MSE = {mse:.4f}, R² Score = {r2:.4f}')

In [None]:
# Quality Control Integration
plot_spc_chart(y_test, y_pred)
trigger_alarm(y_pred, threshold=0.05)

In [None]:
# Six Sigma Analysis
dpmk = calculate_dpmk(y_test, y_pred, threshold=0.05)
cpk = calculate_cpk(y_test, usl=1.0, lsl=0.0)
print(f'Defects Per Million Opportunities (DPMO): {dpmk:.2f}')
print(f'Process Capability Index (Cpk): {cpk:.2f}')