<a href="https://colab.research.google.com/github/Chanaka3534/FYP_XGBOOST-Use-past-30-days-data-/blob/1.0/FYP_XGBOOST(Use_past_30_days_data).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
# 📌 1) Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [22]:
# 📌 2) Load your data
df = pd.read_csv("FYP_DATA.csv")

In [23]:
# 📌 3) Clean numeric columns
df['Resovior water level(m)'] = pd.to_numeric(df['Resovior water level(m)'], errors='coerce')
df['Resovior discharge rate'] = pd.to_numeric(df['Resovior discharge rate'], errors='coerce')
df['Water level(Kaliodai)'] = pd.to_numeric(df['Water level(Kaliodai)'], errors='coerce')

df = df.dropna()

In [24]:
# 📌 4) Sort by Date
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')

In [None]:
# 📌 5) Create 30 lag features for each column you want
lag_days = 30

for col in ['Catchment Rainfall', 'Downstream rainfall',
            'Resovior water level(m)', 'Resovior discharge rate',
            'Water level(Kaliodai)']:
    for lag in range(1, lag_days + 1):
        df[f'{col}(-{lag})'] = df[col].shift(lag)

In [None]:
# 📌 6) Target: flood risk for today (or you can shift -1 for tomorrow)
df['Flood risk target'] = df['Flood risk'].shift(-1)  # Predict tomorrow

In [34]:
# 📌 7) Drop rows with any missing values due to shifting
df_lagged = df.dropna()

# 📌 8) Define features and target
features = []
for col in ['Catchment Rainfall', 'Downstream rainfall',
            'Resovior water level(m)', 'Resovior discharge rate',
            'Water level(Kaliodai)']:
    for lag in range(1, lag_days + 1):
        features.append(f'{col}(-{lag})')

X = df_lagged[features]
y = df_lagged['Flood risk target']

# Encode target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 📌 9) Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42)

In [None]:
# 📌 10) Train XGBoost
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', n_estimators=20)
model.fit(X_train, y_train)

In [None]:
# 📌 11) Evaluate
y_pred = model.predict(X_test)

print("Classification Report:\n")
print(classification_report(
    y_test,
    y_pred,
    labels=range(len(le.classes_)),
    target_names=le.classes_
))
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred, labels=range(len(le.classes_))))

In [37]:
# ✅ Your 30-day manual input (list of 30 rows)
manual_data = [
    [6.7, 0, 103.2, 1700, 1200],
    [9.1, 0, 103.2, 1700, 1200],
    [10, 10.5, 103.1, 1700, 900],
    [9.5, 0, 103.1, 1500, 900],
    [16.2, 0, 103, 1700, 800],
    [13.6, 28.4, 102.9, 1600, 800],
    [6.3, 16, 102.8, 1600, 800],
    [13.6, 3.2, 102.8, 400, 1500],
    [1.7, 0, 102.9, 500, 1500],
    [1.3, 29.3, 103.1, 500, 1500],
    [14.1, 0, 103.3, 300, 1500],
    [0.5, 0, 103.5, 1100, 1500],
    [2.5, 0, 103.5, 750, 1500],
    [3.7, 0, 103.4, 750, 1500],
    [13.5, 0, 103.4, 1050, 1200],
    [4.7, 0, 103.8, 1150, 1200],
    [4.9, 0, 103.9, 1600, 900],
    [0.4, 0, 103.9, 1600, 900],
    [2.1, 2, 103.9, 1800, 900],
    [2.6, 0, 103.9, 1500, 700],
    [1.1, 0, 103.9, 1600, 700],
    [7, 5.3, 103.9, 1600, 1100],
    [8.6, 0, 104.1, 1250, 2500],
    [2.7, 0, 104.2, 1150, 2500],
    [5.4, 0, 104.4, 1150, 2000],
    [13.3, 0, 104.4, 1250, 2000],
    [1.7, 0, 104.5, 1250, 2300],
    [15.3, 0, 104.6, 1250, 2300],
    [4, 0, 104.8, 900, 2300],
    [15.1, 22, 105.1, 0, 2300],
]

# ✅ Convert rows to columns: separate lists
import numpy as np

manual_data_array = np.array(manual_data)

# Now split columns:
catchment_rainfall_30 = manual_data_array[:, 0].tolist()
downstream_rainfall_30 = manual_data_array[:, 1].tolist()
reservoir_level_30 = manual_data_array[:, 2].tolist()
discharge_rate_30 = manual_data_array[:, 3].tolist()
kaliodai_level_30 = manual_data_array[:, 4].tolist()

# Combine all 5 in order:
manual_input = (
    catchment_rainfall_30
    + downstream_rainfall_30
    + reservoir_level_30
    + discharge_rate_30
    + kaliodai_level_30
)

print(f"Manual input length: {len(manual_input)}")

# ✅ Now predict:
manual_pred = model.predict([manual_input])
print("Predicted Flood Risk for tomorrow:", le.inverse_transform(manual_pred)[0])

Manual input length: 150
Predicted Flood Risk for tomorrow: NO
