In [32]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [33]:
import pandas as pd
import numpy as np

In [34]:
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [35]:
import folium

In [36]:
data = pd.read_csv("squirrel_data.csv")

In [37]:
data.head();

In [38]:
data.columns;

In [39]:
df = data[['X', 'Y',
       'Hectare Squirrel Number', 'Age', 'Primary Fur Color']]

In [40]:
df['Age'] = df['Age'].replace('?', 'Adult');
df['Age'] = df['Age'].fillna('Adult');

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'] = df['Age'].replace('?', 'Adult');
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'] = df['Age'].fillna('Adult');


In [41]:
df = df.rename(columns={'X': 'Longitude'})
df = df.rename(columns={'Y': 'Latitude'})

In [42]:
df['Primary Fur Color'] = df['Primary Fur Color'].fillna('Unknown')

In [43]:
le_color = LabelEncoder()
df['Primary Fur Color'] = le_color.fit_transform(df['Primary Fur Color'])

In [44]:
features = ['Longitude', 'Latitude', 'Hectare Squirrel Number']
X = df[features]
y = df['Primary Fur Color']

In [45]:
mask_missing = df['Primary Fur Color'] == le_color.transform(['Unknown'])[0]
X_missing = X[mask_missing]
y_missing = y[mask_missing]
X_known = X[~mask_missing]
y_known = y[~mask_missing]

In [46]:
model = Pipeline(steps=[
    ('scaler', StandardScaler()),  # Standardize features
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [47]:
cross_val_scores = cross_val_score(model, X_known, y_known, cv=5)
print(f'Cross-validation scores: {cross_val_scores}');
print(f'Average cross-validation score: {cross_val_scores.mean()}');

Cross-validation scores: [0.81818182 0.81144781 0.80639731 0.81956155 0.81956155]
Average cross-validation score: 0.8150300077787429


In [48]:
model.fit(X_known, y_known)

In [49]:
y_missing_pred = model.predict(X_missing)

In [50]:
df.loc[mask_missing, 'Primary Fur Color'] = y_missing_pred

In [51]:
df['Primary Fur Color'] = le_color.inverse_transform(df['Primary Fur Color'])

In [52]:
df.to_csv('updated_dataset.csv', index=False)

In [53]:
df["Primary Fur Color"].unique();

In [54]:
updated_df = pd.read_csv("updated_dataset.csv")

In [55]:
central_park_lat = 40.785091
central_park_lon = -73.968285
m = folium.Map(location=[central_park_lat, central_park_lon], zoom_start=15)

In [56]:
color_map = {
    'Gray': 'gray',
    'Cinnamon': 'brown',
    'Black': 'black',
    'Unknown': 'lightblue'
}

In [57]:
for _, row in df.iterrows():
    fur_color = row['Primary Fur Color']
    color = color_map.get(fur_color, 'lightblue')  # Default to lightblue if color not found
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=f'Fur Color: {fur_color}'
    ).add_to(m)

In [58]:
m.save('squirrel_map.html')

In [59]:
m