In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# 1. Memuat data
file_path = 'CitarumWater.csv'
data = pd.read_csv(file_path)

# 2. Preprocessing
# Menghapus kolom yang tidak diperlukan
data = data.drop(columns=['No', 'NamaSungai', 'TitikPantau', 'Waktu', 'IP', 'O2'])

# Menghapus satuan dari kolom Temp dan mengubah ke tipe float
data['Temp'] = data['Temp'].str.replace('°C', '').astype(float)

# Mengisi nilai yang hilang jika ada
data = data.fillna(data.mean())

# 3. Memisahkan fitur (X) dan label (y)
X = data.drop(columns=['Class'])
y = data['Class']

# 4. Membagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Normalisasi fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Membuat model regresi logistik
model = LogisticRegression()
model.fit(X_train, y_train)

# 7. Memprediksi dengan data uji
y_pred = model.predict(X_test)

# 8. Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Akurasi model: {accuracy}')
print('Classification Report:')
print(report)


Akurasi model: 0.93
Classification Report:
              precision    recall  f1-score   support

           1       0.94      0.90      0.92       196
           2       0.93      0.97      0.95       294
           3       0.00      0.00      0.00         6
           4       0.75      0.75      0.75         4

    accuracy                           0.93       500
   macro avg       0.65      0.66      0.65       500
weighted avg       0.92      0.93      0.92       500

