In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# 1. Load Dataset
df = pd.read_csv("/content/drive/MyDrive/Semester 3/Matkul Machine Learning/Praktikum/10/Data/weather_classification_data.csv")

print("=== 5 Data Teratas ===")
print(df.head())

=== 5 Data Teratas ===
   Temperature  Humidity  Wind Speed  Precipitation (%)    Cloud Cover  \
0         14.0        73         9.5               82.0  partly cloudy   
1         39.0        96         8.5               71.0  partly cloudy   
2         30.0        64         7.0               16.0          clear   
3         38.0        83         1.5               82.0          clear   
4         27.0        74        17.0               66.0       overcast   

   Atmospheric Pressure  UV Index  Season  Visibility (km)  Location  \
0               1010.82         2  Winter              3.5    inland   
1               1011.43         7  Spring             10.0    inland   
2               1018.72         5  Spring              5.5  mountain   
3               1026.25         7  Spring              1.0   coastal   
4                990.67         1  Winter              2.5  mountain   

  Weather Type  
0        Rainy  
1       Cloudy  
2        Sunny  
3        Sunny  
4        Rainy

In [4]:
# 2. Cek Missing Value
print("\n=== Mengecek Missing Value ===")
print(df.isnull().sum())


=== Mengecek Missing Value ===
Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64


In [5]:
# 3. Pisahkan Fitur (X) dan Target (Y)
X = df.drop("Weather Type", axis=1)
y = df["Weather Type"]

In [6]:
# 4. Encoding Data Kategorikal
label_encoders = {}
for col in X.select_dtypes(include=["object"]).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

In [7]:
# Encode target label
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)

In [8]:
# 5. Split Data (Training dan Testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# 6. Membuat Model KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [10]:
# 7. Prediksi
y_pred = knn.predict(X_test)

In [11]:
# 8. Evaluasi Model
print("\n=== Akurasi Model ===")
print(accuracy_score(y_test, y_pred))

print("\n=== Classification Report ===")
# Konversi target_encoder.classes_ ke string karena classification_report mengharapkan nama string
print(classification_report(y_test, y_pred, target_names=[str(c) for c in target_encoder.classes_]))


=== Akurasi Model ===
0.8859848484848485

=== Classification Report ===
              precision    recall  f1-score   support

      Cloudy       0.85      0.85      0.85       651
       Rainy       0.85      0.89      0.87       647
       Snowy       0.93      0.95      0.94       701
       Sunny       0.91      0.85      0.88       641

    accuracy                           0.89      2640
   macro avg       0.89      0.88      0.88      2640
weighted avg       0.89      0.89      0.89      2640



In [12]:
# 9. Contoh Prediksi Baru
contoh_data_raw = pd.DataFrame({
    "Temperature": [30],
    "Humidity": [70],
    "Wind Speed": [12],
    "Precipitation (%)": [40],
    "Cloud Cover": ["partly cloudy"],
    "Atmospheric Pressure": [1008],
    "UV Index": [5],
    "Season": ["Spring"],
    "Visibility (km)": [8],
    "Location": ["inland"]
})

In [14]:
# Encoding data kategorikal untuk contoh_data
contoh_data_encoded = contoh_data_raw.copy()
for col in contoh_data_raw.select_dtypes(include=["object"]).columns:
    if col in label_encoders:
        contoh_data_encoded[col] = label_encoders[col].transform(contoh_data_raw[col])
    else:
        print(f"Warning: LabelEncoder for column '{col}' not found.")

hasil = knn.predict(contoh_data_encoded)
hasil_final = target_encoder.inverse_transform(hasil)

print("\n=== Prediksi Cuaca ===")
print("Prediksi Weather Type:", hasil_final[0])


=== Prediksi Cuaca ===
Prediksi Weather Type: Cloudy
