In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 讀取檔案
df = pd.read_csv("water_potability.csv")

# 填充缺失值
df["ph"].fillna(value=df["ph"].median(), inplace=True)
df["Sulfate"].fillna(value=df["Sulfate"].median(), inplace=True)
df["Trihalomethanes"].fillna(value=df["Trihalomethanes"].median(), inplace=True)
df.isnull().sum()

# 切分特徵和目標變量
X = df.drop("Potability", axis=1)
y = df["Potability"]

# 分割資料集為訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#SVM
# 初始化 SVM 分類器
svm_classifier = SVC(kernel='linear')

# 訓練 SVM 模型
svm_classifier.fit(X_train, y_train)

# 使用測試集進行預測
y_pred = svm_classifier.predict(X_test)

# 評估模型準確率
accuracy = accuracy_score(y_test, y_pred)
print(f"模型準確率：{accuracy:.2f}")

# 顯示分類報告
print("分類報告：")
print(classification_report(y_test, y_pred))

In [None]:
from mpl_toolkits.mplot3d import Axes3D

# Assuming X has three features
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

# Create a meshgrid of feature values
x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
z_min, z_max = X.iloc[:, 2].min() - 1, X.iloc[:, 2].max() + 1
xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1),
                         np.arange(z_min, z_max, 0.1))

# Make predictions on the meshgrid
Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])
Z = Z.reshape(xx.shape)

# Plot the decision boundary
ax.scatter(X.iloc[:, 0], X.iloc[:, 1], X.iloc[:, 2], c=y, marker='o', s=50, linewidth=1)
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_zlabel("Feature 3")

ax.contour3D(xx, yy, zz, Z, cmap='viridis', alpha=0.5)
plt.title("SVM Decision Boundary in 3D")
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
#RandomForest
# 創建一個隨機森林分類器
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# 訓練模型
rf_classifier.fit(X_train, y_train)

# 使用測試集進行預測
y_pred = rf_classifier.predict(X_test)

# 計算模型的準確度
accuracy = accuracy_score(y_test, y_pred)
print(f"模型的準確度: {accuracy}")

# 列印分類報告
report = classification_report(y_test, y_pred)
print("分類報告:\n", report)

plt.show()
