# Victoria Wildfires Data Feeding

In [1]:
import pandas as pd

weather=pd.read_csv('train_data_victoria_weather')

bushfire=pd.read_csv('train_data_victoria_bushfire')

In [2]:
weather.head()

Unnamed: 0.1,Unnamed: 0,station,date,rainfall,avg_temp,avg_rh,avg_windspeed
0,2528,Melbourne (Olympic Park) {station 086338},2020-06-01,5.0,10.25,88.0,11.0
1,2529,Melbourne (Olympic Park) {station 086338},2020-06-02,11.6,12.0,73.5,8.0
2,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0
3,2531,Melbourne (Olympic Park) {station 086338},2020-06-04,0.0,11.55,65.0,16.00449
4,2532,Melbourne (Olympic Park) {station 086338},2020-06-05,0.0,9.35,77.0,9.0


In [3]:
bushfire.head()

Unnamed: 0,latitude,longitude,brightness,date,confidence,frp,daynight,type,zone
0,-35.8234,146.5838,301.8,2017-01-01,44,7.4,0,0,15.0
1,-37.8179,145.3028,315.0,2017-01-03,44,9.8,1,0,13.0
2,-37.6923,140.5473,313.0,2017-01-03,40,7.7,1,0,4.0
3,-37.5509,140.8295,313.3,2017-01-03,45,8.6,1,0,4.0
4,-38.0136,145.2693,320.9,2017-01-05,27,5.5,1,0,14.0


In [4]:
# Merge weather and bushfire data
merged_data = pd.merge(weather, bushfire, how='inner')

# Extract 'day', 'month', and 'year' from the 'date' column
merged_data['date'] = pd.to_datetime(merged_data['date'])
merged_data['day'] = merged_data['date'].dt.day
merged_data['month'] = merged_data['date'].dt.month
merged_data['year'] = merged_data['date'].dt.year

merged_data.head(5)

Unnamed: 0.1,Unnamed: 0,station,date,rainfall,avg_temp,avg_rh,avg_windspeed,latitude,longitude,brightness,confidence,frp,daynight,type,zone,day,month,year
0,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0,-35.0261,145.8218,302.9,54,5.9,1,0,11.0,3,6,2020
1,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0,-37.295,141.1312,387.8,100,418.4,1,0,4.0,3,6,2020
2,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0,-37.3022,141.1076,349.9,96,140.1,1,0,4.0,3,6,2020
3,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0,-35.0469,145.7983,301.0,43,4.8,1,0,11.0,3,6,2020
4,2530,Melbourne (Olympic Park) {station 086338},2020-06-03,0.6,12.3,67.0,13.0,-37.3003,141.1272,374.6,100,295.5,1,0,4.0,3,6,2020


# Machine Learning Models (Random Forest and XGBoost)

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Select features and target variables
features = ['rainfall','avg_temp', 'avg_rh', 'avg_windspeed','latitude','longitude', 'day', 'month', 'year', 'zone']
target_variables = ['brightness', 'confidence', 'frp']

# Split data into features (X) and target variables (y)
X = merged_data[features]
y = merged_data[target_variables]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Initialize and train the Random Forest Regressor model
rf_models = {}
for target in target_variables:
    model = RandomForestRegressor()
    model.fit(X_train, y_train[target])
    rf_models[target] = model
    
# Initialize and train the XGBRegressor model
    xgb_models = {}
for target in target_variables:
    model = XGBRegressor()  # Initialize XGBoost regressor
    model.fit(X_train, y_train[target])  # Train the model
    xgb_models[target] = model  # Store the trained model


In [6]:
# Make predictions on the test set and evaluate the Random Forest Regressor model
evaluation_results = {}
for target in target_variables:
    y_pred = rf_models[target].predict(X_test)
    mae = mean_absolute_error(y_test[target], y_pred)
    mse = mean_squared_error(y_test[target], y_pred)
    r2 = r2_score(y_test[target], y_pred)
    evaluation_results[target] = {'MAE': mae, 'MSE': mse, 'R-squared': r2}

# Print evaluation results
for target, metrics in evaluation_results.items():
    print(f"Evaluation metrics for {target}:")
    print(f"MAE: {metrics['MAE']}")
    print(f"MSE: {metrics['MSE']}")
    print(f"R-squared: {metrics['R-squared']}")
    print()


Evaluation metrics for brightness:
MAE: 10.21468618155961
MSE: 257.7253911589634
R-squared: 0.8068477500534924

Evaluation metrics for confidence:
MAE: 8.93477917357984
MSE: 197.67037015090463
R-squared: 0.7219082710781348

Evaluation metrics for frp:
MAE: 51.78924539780359
MSE: 23872.683890930268
R-squared: 0.761724529325153



In [7]:
# Make predictions on the test set and evaluate the  XGBRegressor model
evaluation_results = {}
for target in target_variables:
    y_pred = xgb_models[target].predict(X_test)
    mae = mean_absolute_error(y_test[target], y_pred)
    mse = mean_squared_error(y_test[target], y_pred)
    r2 = r2_score(y_test[target], y_pred)
    evaluation_results[target] = {'MAE': mae, 'MSE': mse, 'R-squared': r2}

# Print evaluation results
for target, metrics in evaluation_results.items():
    print(f"Evaluation metrics for {target}:")
    print(f"MAE: {metrics['MAE']}")
    print(f"MSE: {metrics['MSE']}")
    print(f"R-squared: {metrics['R-squared']}")
    print()

Evaluation metrics for brightness:
MAE: 16.958831468841197
MSE: 584.0807213805463
R-squared: 0.5622608041151551

Evaluation metrics for confidence:
MAE: 17.312319898463443
MSE: 507.6713545504474
R-squared: 0.2857846899195834

Evaluation metrics for frp:
MAE: 85.70522593548398
MSE: 44372.820693422174
R-squared: 0.5571107637414556



In [11]:
#Forecasting

# Create a DataFrame for the input features
input_data = pd.DataFrame({
    'rainfall':[1],
    'avg_temp':[20],
    'avg_rh':[88],
    'avg_windspeed':[11],
    'latitude':[-37.5],
    'longitude':[143], 
    'day':[5], 
    'month':[5], 
    'year':[2024], 
    'zone':[9]
})

# Use the trained model to make a prediction
predicted_confidence = rf_models['confidence'].predict(input_data)
predicted_brightness = rf_models['brightness'].predict(input_data)
predicted_frp = rf_models['frp'].predict(input_data)

# Print the predicted confidence
print("Predicted Confidence:", predicted_confidence[0])
print("Predicted Brightness:", predicted_brightness[0])
print("Predicted FRP:", predicted_frp[0])


Predicted Confidence: 58.53
Predicted Brightness: 327.62200000000024
Predicted FRP: 67.39


In [17]:
# Define your input data as a DataFrame
input_data = pd.DataFrame({
    'rainfall': [20, 20, 20, 20, 20],
    'avg_temp': [20, 21, 22, 23, 23],
    'avg_rh': [88, 89, 90, 87, 90],
    'avg_windspeed': [11, 10, 10, 10, 11],
    'latitude': [-37.5, -37.6, -37.7, -37.8, -37.9],
    'longitude': [143, 143, 143, 143, 143], 
    'day': [5, 5, 5, 5, 5], 
    'month': [7, 7, 7, 7, 7], 
    'year': [2024, 2024, 2024, 2024, 2024], 
    'zone': [9, 9, 9, 9, 9]
})

# Use the trained models to make predictions
predicted_confidence = rf_models['confidence'].predict(input_data)
predicted_brightness = rf_models['brightness'].predict(input_data)
predicted_frp = rf_models['frp'].predict(input_data)

# Add the predictions to the original DataFrame
input_data['predicted_confidence'] = predicted_confidence
input_data['predicted_brightness'] = predicted_brightness
input_data['predicted_frp'] = predicted_frp

# Display the DataFrame
input_data

Unnamed: 0,rainfall,avg_temp,avg_rh,avg_windspeed,latitude,longitude,day,month,year,zone,predicted_confidence,predicted_brightness,predicted_frp
0,20,20,88,11,-37.5,143,5,7,2024,9,60.41,327.831,63.231
1,20,21,89,10,-37.6,143,5,7,2024,9,59.82,367.394,45.174
2,20,22,90,10,-37.7,143,5,7,2024,9,61.47,363.377,52.855
3,20,23,87,10,-37.8,143,5,7,2024,9,61.44,363.271,54.281
4,20,23,90,11,-37.9,143,5,7,2024,9,61.78,364.257,73.626
