<a href="https://colab.research.google.com/github/Chi36/forestfire/blob/main/Forestfireprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import make_scorer, r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import pickle
import warnings
import gradio as gr

warnings.filterwarnings("ignore")

# Load dataset
file_path = "wildfire_prediction_multi_output_dataset_v2.xlsx"  # Replace with your Excel file path
df = pd.read_excel(file_path)

# Separate numeric and categorical columns
numeric_columns = ['Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)',
                   'Rainfall (mm)', 'Fuel Moisture (%)', 'Slope (%)']
categorical_columns = ['Vegetation Type', 'Region']

# Handle missing values
numeric_imputer = SimpleImputer(strategy="mean")
df[numeric_columns] = numeric_imputer.fit_transform(df[numeric_columns])

categorical_imputer = SimpleImputer(strategy="most_frequent")
df[categorical_columns] = categorical_imputer.fit_transform(df[categorical_columns])

# One-Hot Encode Categorical Variables
encoder = OneHotEncoder(sparse_output=False)  # Updated to sparse_output=False
encoded_vars = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_vars, columns=encoder.get_feature_names_out())

# Drop original categorical columns and concatenate encoded ones
df = df.drop(categorical_columns, axis=1)
df = pd.concat([df, encoded_df], axis=1)

# Split input (X) and output (y)
X = df.iloc[:, :-4]  # Inputs (first columns excluding the last 4)
y1, y2, y3, y4 = df.iloc[:, -4], df.iloc[:, -3], df.iloc[:, -2], df.iloc[:, -1]

# One-Hot Encode the target variables if they are categorical
y4 = y4.map({'Yes': 1, 'No': 0})  # Adjust based on your dataset

# Ensure all features are numeric
X = X.apply(pd.to_numeric, errors='coerce')
X = X.fillna(0)  # Fill any remaining NaN values with 0

# Define models to be used
models = {
    'RandomForest': RandomForestRegressor(),
    'LinearRegression': LinearRegression(),
    'SVR': SVR(),
    'KNN': KNeighborsRegressor(),
}

# Train models and select the best one
best_models = {}
for output_name, y in {'Fire Size': y1, 'Fire Duration': y2, 'Suppression Cost': y3, 'Fire Occurrence': y4}.items():
    best_score = -np.inf
    best_model = None
    for model_name, model in models.items():
        try:
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
            mean_score = np.mean(scores)
            if mean_score > best_score:
                best_score = mean_score
                best_model = (model_name, model)
        except Exception as e:
            print(f"Failed to train {model_name} for {output_name}: {e}")
    if best_model:
        model_name, model = best_model
        model.fit(X, y)
        with open(f'best_{model_name}_{output_name}.pkl', 'wb') as model_file:
            pickle.dump(model, model_file)
        best_models[output_name] = (model_name, model)

# Define Gradio app
def predict_fire_metrics(temperature, humidity, wind_speed, rainfall, fuel_moisture, slope, vegetation_type, region):
    input_data = pd.DataFrame({
        'Temperature (°C)': [temperature],
        'Humidity (%)': [humidity],
        'Wind Speed (km/h)': [wind_speed],
        'Rainfall (mm)': [rainfall],
        'Fuel Moisture (%)': [fuel_moisture],
        'Slope (%)': [slope],
        'Vegetation Type_' + vegetation_type: [1],
        'Region_' + region: [1]
    })
    input_data = input_data.reindex(columns=X.columns, fill_value=0)
    predictions = {}
    for output_name in ['Fire Size', 'Fire Duration', 'Suppression Cost', 'Fire Occurrence']:
        try:
            model_name, model = best_models[output_name]
            prediction = model.predict(input_data)[0]
            predictions[f'{model_name} - {output_name}'] = prediction
        except Exception as e:
            predictions[f'{output_name}'] = f"Error: {e}"
    return predictions

# Gradio Interface
inputs = [
    gr.Number(label='Temperature (°C)'),
    gr.Number(label='Humidity (%)'),
    gr.Number(label='Wind Speed (km/h)'),
    gr.Number(label='Rainfall (mm)'),
    gr.Number(label='Fuel Moisture (%)'),
    gr.Number(label='Slope (%)'),
    gr.Textbox(label='Vegetation Type'),
    gr.Textbox(label='Region')
]

outputs = gr.JSON()

app = gr.Interface(fn=predict_fire_metrics, inputs=inputs, outputs=outputs, title='Wildfire Metrics Prediction')

if __name__ == "__main__":
    app.launch()

# Requirements
# gradio
# pandas
# numpy
# scikit-learn
# matplotlib
# openpyxl
# pickle

Failed to train RandomForest for Fire Occurrence: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py", line 363, in fit
    X, y = self._validate_data(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 650, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/usr/local/lib/python3.10/dist-pac