In [None]:
import pandas as pd
from haversine import haversine

# Load environmental data
env_data = pd.read_csv('environment_data.csv', parse_dates=['timestamp'])
# Load wildfire data
fire_data = pd.read_csv('wildfire_data.csv', parse_dates=['timestamp', 'fire_start_time'])

In [None]:
# Define parameters
time_window_hours = 24 * 7  # 1 week
max_distance_km = 50  # 50 km radius

# Prepare to collect samples
samples = []

for idx, fire in fire_data.iterrows():
    fire_time = fire['fire_start_time']
    fire_loc = (fire['latitude'], fire['longitude'])
    
    # Filter environmental data within time window
    mask = (env_data['timestamp'] >= fire_time - pd.Timedelta(hours=time_window_hours)) & \
           (env_data['timestamp'] < fire_time)
    relevant_env = env_data[mask].copy()
    
    # Calculate distance to fire
    relevant_env['distance'] = relevant_env.apply(
        lambda row: haversine((row['latitude'], row['longitude']), fire_loc), axis=1
    )
    
    # Filter by distance
    nearby_env = relevant_env[relevant_env['distance'] <= max_distance_km]
    
    if not nearby_env.empty:
        # Aggregate features (e.g., mean values)
        aggregated = nearby_env.groupby('timestamp').agg({
            'temperature': 'mean',
            'humidity': 'mean',
            'wind_speed': 'mean',
            'precipitation': 'mean',
            'vegetation_index': 'mean',
            'human_activity_index': 'mean'
        }).reset_index()
        
        # Assign fire severity as target
        aggregated['severity'] = fire['severity']
        samples.append(aggregated)

# Combine into a single DataFrame
if samples:
    fire_features = pd.concat(samples)
else:
    raise ValueError("No overlapping environmental data found for fires.")

In [None]:
# Generate negative samples (no fires)
non_fire_samples = env_data.sample(n=len(fire_data) * 10)  # Adjust based on your data
non_fire_samples['severity'] = 'none'  # Indicate no fire

# Combine positive and negative samples
all_data = pd.concat([fire_features, non_fire_samples], ignore_index=True)

In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode severity
le = LabelEncoder()
all_data['severity_encoded'] = le.fit_transform(all_data['severity'])

# Features and target
features = ['temperature', 'humidity', 'wind_speed', 'precipitation', 'vegetation_index', 'human_activity_index']
X = all_data[features]
y = all_data['severity_encoded']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))

In [None]:
# Example prediction
new_data = pd.DataFrame([{
    'temperature': 30.0,
    'humidity': 40,
    'wind_speed': 20,
    'precipitation': 0.5,
    'vegetation_index': 50,
    'human_activity_index': 30
}])

prediction = model.predict(new_data)
print('Predicted severity:', le.inverse_transform(prediction)[0])