### Model Regresi Logistik Untuk Memprediksi Kemungkinan Hujan
Nama  : Afif Syaifullah Fattah, NIM   : 120140229

#### Preparasi Data

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

winDict = {'E ' : 0, 'NE' : 45, 'N ' : 90, 'NW' : 135, 'W ' : 180, 'SW' : 225, 'S ' : 270, 'SE' : 315}

# Preparasi data
# Sumber data : https://www.kaggle.com/datasets/greegtitan/indonesia-climate?select=climate_data.csv
dtf = pd.read_csv("climate_data/climate_data.csv")
dtf = dtf[dtf['station_id'] == 96295]       # Filter data dari satu stasiun cuaca (Bandara Radin Intan II)
dtf = dtf[dtf['ddd_car'] != 'C ']           # Filter data yang kurang lengkap
dtf = dtf.dropna()                          # Hapus datapoint dengan nilai fitur NaN
dtf = dtf.replace({"ddd_car" : winDict})    # Ubah data arah angin menjadi nilai derajat
dtf['isRain'] = (dtf['RR'] > 0.0)           # Nilai boolean turun hujan

features = dtf[['Tn', 'Tx', 'Tavg', 'RH_avg', 'ss', 'ff_x', 'ff_avg', 'ddd_x', 'ddd_car']].values
targets = dtf['isRain'].values

# Membagi dataset menjadi data training dan data testing
training_ftr, test_ftr, training_tgt, test_tgt = train_test_split(features, targets, random_state=229)

print(f"Jumlah total data : {dtf.shape[0]}")
print(dtf.head())

#### Pelatihan Model dengan Training Data

In [None]:
# Pelatihan model dengan data training
print("Initializing...")
model = LogisticRegression(max_iter=800)
model.fit(training_ftr, training_tgt)
print("Done!")

print("\nContoh Hasil Prediksi :")
print("Data     : ", test_ftr[0])
print("Prediksi : ", model.predict_proba([test_ftr[0]])[:, 1])

#### Testing dan Evaluasi

In [None]:
prediction_results = model.predict(test_ftr)

print("accuracy  : ", accuracy_score(test_tgt, prediction_results))
print("precision : ", precision_score(test_tgt, prediction_results))
print("recall    : ", recall_score(test_tgt, prediction_results))
print("f1 score  : ", f1_score(test_tgt, prediction_results))

# Membalikan posisi nilai matriks, karena secara default true negative berada di kiri
con_mat = confusion_matrix(test_tgt, prediction_results)
con_mat[0], con_mat[1] = con_mat[0][::-1], con_mat[1][::-1]
con_mat = con_mat[::-1]

print("\nconfusion matrix :")
print(con_mat)

In [None]:
print("\nModel coefficients :")
print(model.coef_, model.intercept_)