In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import folium
import xgboost as xgb


In [3]:
#Generating background data to use a controls (non-encounter data)
attack_df = pd.read_excel('bear_attack_processed_data_final_ver..xlsx')

latitude_min, latitude_max = attack_df['Latitude'].min(), attack_df['Latitude'].max()
longitude_min, longitude_max = attack_df['Longitude'].min(), attack_df['Longitude'].max()

print("latitude bounds:",latitude_min, latitude_max)
print("Longitude bounds",longitude_min, longitude_max)

#Number of non-encounter data points 
n_background = 43654

#Creating random coordinates 
background_lat = np.random.uniform(latitude_min, latitude_max, n_background)
background_lon = np.random.uniform (longitude_min, longitude_max, n_background)

#Creating random dates
start_date = pd.to_datetime('2000-01-01')
end_date = pd.to_datetime ('2025-01-01')
all_dates =pd.date_range(start = start_date,end=end_date)
random_dates = np.random.choice (all_dates, size= n_background, replace= True)
random_dates =pd.to_datetime(random_dates).normalize()

#Random species
species =np.random.choice(['Black Bear', 'Grizzly Bear'], size=n_background, p= [0.93, 0.07])


#Generating background non-encounter dataframe
background_data = pd.DataFrame({
    'Species Common Name': species,
    'Date': random_dates,
    'Latitude':background_lat,
    'Longitude': background_lon,
    'Attack': 0
    
})

background_data.head()


latitude bounds: 43.93289 69.083333
Longitude bounds -139.5 -53.9280555


Unnamed: 0,Species Common Name,Date,Latitude,Longitude,Attack
0,Black Bear,2012-12-13,59.888231,-100.02437,0
1,Black Bear,2014-12-20,58.836243,-87.456879,0
2,Black Bear,2002-05-07,65.276488,-117.824315,0
3,Black Bear,2019-07-23,53.276867,-72.215002,0
4,Black Bear,2009-06-20,67.67874,-116.723878,0


In [4]:
#Combining non-encounter and encounter data 
#Attack = 0 means non-encounter, 1 means encounter 
full_df =pd.concat([attack_df, background_data], ignore_index=True)
full_df

Unnamed: 0,Species Common Name,Date,Latitude,Longitude,Attack
0,Grizzly Bear,2010-01-15,50.620000,-116.070000,1
1,Grizzly Bear,2010-03-26,51.496800,-115.928100,1
2,Grizzly Bear,2010-03-27,51.496800,-115.928100,1
3,Grizzly Bear,2010-03-27,51.496800,-115.928100,1
4,Grizzly Bear,2010-04-02,51.496800,-115.928100,1
...,...,...,...,...,...
87303,Black Bear,2004-03-21,57.744816,-99.605425,0
87304,Black Bear,2015-05-22,62.197872,-116.602800,0
87305,Black Bear,2009-04-30,67.787958,-81.100571,0
87306,Black Bear,2018-07-15,58.668462,-133.847192,0


In [5]:
#Encoding species name as number (Black =0, Grizzly =1)
full_df['Species Common Name'].map({'Black Bear':0, 'Grizzly Bear':1})

#Defining Features as X and y
X= full_df[['Species Common Name', 'Date', 'Latitude', 'Longitude']]
y= full_df['Attack']

In [None]:
#Splitting into training and testing data 
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
#Create and train XGBoost Model
model = xgb.XBGClassifier(
    n_estimators = 300
    learning_rate =0.05
    max_depth =4
    subsample=0.8
    colsample_bytree=0.8
    random_state=42
)
model.fit(X_train, y_train)

In [None]:
#Creating Folium Model for Visualization

In [None]:
#Make Predictions
y_pred = model.predict(X_test)

In [None]:
#Evaluate Model
