# Wildfire Size Prediction

Predicting wildfire sizes for US states from 2011 to 2015

In [1]:
import sys

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from src.data_processor import WildfireDataProcessor
from src.model import WildfirePredictor

In [2]:
# Initialize processors
data_processor = WildfireDataProcessor()
model = WildfirePredictor(n_estimators=200)

In [3]:
# Load and prepare data
X, y = data_processor.load_and_prepare_data('data/wildfire_sizes_before_2010.csv', 'data/merged_state_data.csv')

In [4]:
X_train, X_test, y_train, y_test = data_processor.split_data(X, y)

In [5]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('std_scaler', StandardScaler())
])

In [6]:
cat_pipeline = Pipeline([
    ('onehot', OneHotEncoder())
])