# ⚠️ Conflict Risk Modeling

## Step 1: Load dataset

In [1]:
import pandas as pd

# Load cleaned data
df = pd.read_csv("cleaned_gbif_data.csv")

# Keep only conflict-prone species
# Use flexible filtering with contains()
conflict_keywords = ['loxodonta', 'panthera', 'syncerus', 'potamochoerus', 'civettictis', 'papio']

# Convert species names to lowercase and filter
df['scientificName_lower'] = df['scientificName'].str.lower()
df_conflict = df[df['scientificName_lower'].str.contains('|'.join(conflict_keywords), na=False)].copy()

# View the result
print(f"Total conflict-prone records: {len(df_conflict)}")
display(df_conflict['scientificName'].value_counts())

Total conflict-prone records: 12


Unnamed: 0_level_0,count
scientificName,Unnamed: 1_level_1
"Papio cynocephalus (Linnaeus, 1766)",4
"Potamochoerus larvatus (F.Cuvier, 1822)",3
"Syncerus caffer (Sparrman, 1779)",2
"Civettictis civetta (Schreber, 1776)",1
"Panthera pardus (Linnaeus, 1758)",1
"Loxodonta africana (Blumenbach, 1797)",1


## Step 2: Feature engineering

In [None]:
# Extract month from date
df['eventDate'] = pd.to_datetime(df['eventDate'], errors='coerce')
df['month'] = df['eventDate'].dt.month

# Encode species as numeric categories
df['species_code'] = df['scientificName'].astype('category').cat.codes

# Simulate a target variable: 'conflict' (this would be real if you had HWC reports)
# For now, assume conflict is more likely in dry season or near 1-2 species
df['conflict'] = ((df['month'].isin([1, 2, 3, 8, 9])) | (df['species_code'] == 0)).astype(int)

df[['scientificName', 'month', 'species_code', 'conflict']].head()


Unnamed: 0,scientificName,month,species_code,conflict
0,"Coleura afra (Peters, 1852)",9.0,12,1
1,"Coleura afra (Peters, 1852)",9.0,12,1
2,"Miniopterus minor Peters, 1867",9.0,38,1
3,"Rousettus aegyptiacus (E.Geoffroy, 1810)",9.0,67,1
4,"Epomophorus wahlbergi (Sundevall, 1846)",9.0,17,1


## Step 3: Train/test split and modeling

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Define features and target
features = ['month', 'species_code', 'decimalLatitude', 'decimalLongitude']
X = df[features]
y = df['conflict']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        18

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



## Step 4: Evaluate predictions by use of maps

In [None]:
import folium
from folium.plugins import MarkerCluster

# Add predictions back to the data
# Extract month from date
df_conflict['eventDate'] = pd.to_datetime(df_conflict['eventDate'], errors='coerce')
df_conflict['month'] = df_conflict['eventDate'].dt.month

# Encode species as numeric categories
df_conflict['species_code'] = df_conflict['scientificName'].astype('category').cat.codes

df_conflict['predicted_conflict'] = model.predict(df_conflict[features])

# Map
m = folium.Map(location=[-3.3, 38.5], zoom_start=7)
marker_cluster = MarkerCluster().add_to(m)

# Only plot predicted conflict points
for idx, row in df_conflict[df_conflict['predicted_conflict'] == 1].iterrows():
    folium.Marker(
        location=[row['decimalLatitude'], row['decimalLongitude']],
        popup=row['scientificName']
    ).add_to(marker_cluster)

m