In [12]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV

In [9]:
##### importing the data #####

dataset = pd.read_excel('/Users/mustafa/Documents/GitHub/ML_knife_QA/data/chiefs_knife_dataset.xlsx')
index_Ra = dataset.columns.get_loc('Ra')  # index of the surface roughness column for inserting the class. label

LOWER_SPECIFICATION_LIMIT = 0.125  # lower bound of good quality product region
UPPER_SPECIFICATION_LIMIT = 0.215  # upper bound of good quality product region

is_between_specification_bounds = (dataset['Ra'] >= LOWER_SPECIFICATION_LIMIT) & (dataset['Ra'] < UPPER_SPECIFICATION_LIMIT)
good_product_range = np.where(is_between_specification_bounds, "good", "bad")
dataset.insert(index_Ra + 1, 'Quality', good_product_range) 

In [13]:
"""# constructing Label"""

X = dataset.loc[:,'Original_Linienanzahl':'DFT_Median_sobel_Bereich'].values
y = dataset['Quality'].values

# Train-Test-Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature-Skalierung
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Modelltraining
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Vorhersagen
y_pred = classifier.predict(X_test)

# Evaluierung
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

Accuracy: 0.7555816686251469
Classification Report:
              precision    recall  f1-score   support

         bad       0.77      0.69      0.73       798
        good       0.75      0.81      0.78       904

    accuracy                           0.76      1702
   macro avg       0.76      0.75      0.75      1702
weighted avg       0.76      0.76      0.75      1702



In [None]:
# Randomized Search für Random Forest Classifier
random_search_classifier = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=42),
                                            param_distributions=param_grid, 
                                            n_iter=100, 
                                            cv=5, 
                                            n_jobs=-1, 
                                            verbose=2, 
                                            random_state=42)
random_search_classifier.fit(X_train_clf_scaled, y_train_clf)
print(f'Beste Hyperparameter für Classifier: {random_search_classifier.best_params_}')