In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00601/ai4i2020.csv"
df = pd.read_csv(url)

if 'UDI' in df.columns:
    df = df.drop(columns=['UDI'])
if 'Product ID' in df.columns:
    df = df.drop(columns=['Product ID'])

target = 'Machine failure'
X = df.drop(columns=[target])
y = df[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42, n_estimators=100),
    "SVM": SVC(kernel='rbf', random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred)
    }

result_df = pd.DataFrame(results).T
print(result_df)

plt.figure(figsize=(8,5))
sns.barplot(data=result_df, x=result_df.index, y="Accuracy")
plt.title("Perbandingan Akurasi Model")
plt.ylabel("Accuracy")
plt.xlabel("Model")
plt.ylim(0,1)
plt.show()

plt.figure(figsize=(8,6))
sns.heatmap(pd.DataFrame(X, columns=df.drop(columns=[target]).columns).corr(), cmap="coolwarm", annot=False)
plt.title("Heatmap Korelasi Fitur")
plt.show()

best_model_name = result_df['Accuracy'].idxmax()
best_model = models[best_model_name]
y_pred_best = best_model.predict(X_test)

cm = confusion_matrix(y_test, y_pred_best)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title(f"Confusion Matrix - {best_model_name}")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

print(f"\nModel Terbaik: {best_model_name}")
print("\nLaporan Klasifikasi:\n", classification_report(y_test, y_pred_best))


ValueError: could not convert string to float: 'M'

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

data = pd.read_csv("ai4i2020.csv")
data = data.drop(columns=['UDI', 'Product ID'])
data = pd.get_dummies(data, columns=['Type'], drop_first=True)

X = data.drop(columns=['Machine failure'])
y = data['Machine failure']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Akurasi Model Random Forest: {acc:.2f}")
