In [3]:
# Install required packages
!pip install pandas numpy scikit-learn xgboost lightgbm joblib langgraph langchain-core matplotlib seaborn plotly gradio pyngrok retry

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [4]:
import sys
sys.path.append('/kaggle/input/flood-prediction-pipeline/Flood Prediction Pipeline')

from langgraph.graph import StateGraph, END
from state import FloodPredictionState
from config import CONFIG
from logger import structured_log
from data_loader import DataLoaderAgent
from preprocessor import PreprocessorAgent
from model_trainer import ModelTrainerAgent
from model_tuner import ModelTunerAgent
from explainer import ExplainerAgent
from visualizer import VisualizerAgent
from monitor import MonitorAgent
from model_saver import ModelSaverAgent
from predictor import PredictorAgent
from dashboard import DashboardAgent

In [5]:
class FloodPredictionWorkflow:
    def __init__(self, config):
        self.config = config
        self.data_loader = DataLoaderAgent()
        self.preprocessor = PreprocessorAgent(config)
        self.model_trainer = ModelTrainerAgent(config)
        self.model_tuner = ModelTunerAgent(config)
        self.explainer = ExplainerAgent(config)
        self.visualizer = VisualizerAgent(config)
        self.monitor = MonitorAgent()
        self.model_saver = ModelSaverAgent(config)
        self.predictor = PredictorAgent()
        self.dashboard = DashboardAgent(config)
        self.graph = self._build_graph()

    def _build_graph(self):
        graph = StateGraph(FloodPredictionState)
        
        # Add nodes
        graph.add_node("load_data", self.data_loader.load_data)
        graph.add_node("preprocess_data", self.preprocessor.preprocess_data)
        graph.add_node("train_models", self.model_trainer.train_models)
        graph.add_node("tune_best_model", self.model_tuner.tune_best_model)
        graph.add_node("explain_model", self.explainer.explain_model)
        graph.add_node("visualize_data", self.visualizer.visualize_data)
        graph.add_node("monitor_performance", self.monitor.monitor_performance)
        graph.add_node("save_model", self.model_saver.save_model)
        graph.add_node("make_sample_prediction", self.predictor.make_sample_prediction)
        graph.add_node("setup_dashboard", self.dashboard.setup_dashboard)
        
        # Define edges
        graph.add_edge("load_data", "preprocess_data")
        graph.add_edge("preprocess_data", "train_models")
        graph.add_edge("train_models", "tune_best_model")
        graph.add_edge("tune_best_model", "explain_model")
        graph.add_edge("explain_model", "visualize_data")
        graph.add_edge("visualize_data", "monitor_performance")
        graph.add_edge("monitor_performance", "save_model")
        graph.add_edge("save_model", "make_sample_prediction")
        graph.add_edge("make_sample_prediction", "setup_dashboard")
        graph.add_edge("setup_dashboard", END)
        
        # Set entry point
        graph.set_entry_point("load_data")
        return graph.compile()

    def run(self):
        try:
            structured_log('INFO', "Starting flood prediction pipeline")
            initial_state = FloodPredictionState(data_path=self.config['data_path'])
            final_state = self.graph.invoke(initial_state)
            structured_log('INFO', "Pipeline completed successfully")
            return final_state
        except Exception as e:
            structured_log('ERROR', f"Pipeline failed: {str(e)}")
            raise

In [6]:
# Run the workflow
workflow = FloodPredictionWorkflow(CONFIG)
final_state = workflow.run()

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001969 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 466
[LightGBM] [Info] Number of data points in the train set: 40000, number of used features: 20
[LightGBM] [Info] Start training from score 0.499519
Feature Importance Types: {'MonsoonIntensity': 'float', 'RiverManagement': 'float', 'Urbanization': 'float', 'ClimateChange': 'float', 'DamsQuality': 'float', 'Siltation': 'float', 'AgriculturalPractices': 'float', 'Encroachments': 'float', 'IneffectiveDisasterPreparedness': 'float', 'CoastalVulnerability': 'float', 'Landslides': 'float', 'Watersheds': 'float', 'PopulationScore': 'float', 'WetlandLoss': 'float', 'InadequatePlanning': 'float', 'PoliticalFactors': 'float', 'Monsoon_Drainage': 'float', 'Urban_Climate': 'float', 'LandslideRisk': 'float', 'InadequateInfrastructure': 'float'}


  with pd.option_context('mode.use_inf_as_na', True):

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.


use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.



* Running on public URL: https://a9c86bf0eae6c55bf4.gradio.live


In [7]:
# Example prediction
sample_data = final_state['X_test'].iloc[0].to_dict()
# Use X_test columns directly to match processed features
sample_input = {k: sample_data.get(k, 0) for k in final_state['X_test'].columns}
prediction = workflow.predictor.predict(final_state, sample_input)
structured_log('INFO', f"Sample prediction: {prediction:.4f}")

In [8]:
workflow.dashboard.app.launch(share=True, quiet=True)

* Running on public URL: https://a9c86bf0eae6c55bf4.gradio.live


