In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import os
import re
import nltk

In [2]:
df = pd.read_csv('bear_attacks.csv')

# Visualise Word Cloud
---

In [4]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [6]:
stop_words = set(stopwords.words("english"))
custom_stop_words=['bear','attack','brown','polar','black','killed','attacked']
stop_words = stop_words.union(custom_stop_words)

In [7]:
def clean_text(txt):   
    wcount=len(txt)
    corpus=[]
    #remove punctuation
    text=re.sub('[^a-zA-Z]',' ',txt)

    #lowercase
    text=text.lower()

    #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)

    #remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)

    # string to list
    text=text.split()

    #stemming
    ps=PorterStemmer()

    #lemmatisation
    lem=WordNetLemmatizer()
    text=[lem.lemmatize(word) for word in text if not word in stop_words]
    text=" ".join(text)

    corpus.append(text)
    
    return corpus

In [8]:
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
%matplotlib inline

def show_wordcloud(txt):
    wordcloud = WordCloud(
                              background_color='white',
                              stopwords=stop_words,
                              max_words=100,
                              max_font_size=50, 
                              random_state=42
                             ).generate(str(clean_text(txt)))
    plt.rcParams["figure.figsize"] = (20,10)
    fig = plt.figure(1)
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.show()

# Visualise on Map
---

In [10]:
!pip install geocoder
!conda install -c conda-forge folium=0.5.0 --yes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 3.9 MB/s 
[?25hCollecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
/bin/bash: conda: command not found


In [11]:
import geocoder
import folium
from geopy.geocoders import Nominatim

In [12]:
def add_to_map(table,colour,fill_colour,map_of):
    for lat, lng, Loc in zip(table['Latitude'], table['Longitude'], table['Location']):
        label = '{}'.format(Loc)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker([lat, lng], radius=3, popup=label, color=colour, fill=True, fill_color=fill_colour, fill_opacity=0.7, parse_html=False).add_to(map_of)
    return map_of

In [13]:
pb=df.loc[df['Bear']=='Polar']
blb=df.loc[df['Bear']=='Black']
brb=df.loc[df['Bear']=='Brown']

In [14]:
address = 'Vancouver, BC'

geolocator = Nominatim(user_agent="na_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_north_america = folium.Map(location=[latitude, longitude], zoom_start=3)
map_north_america_pure = map_north_america

In [15]:
map_bear_attacks=add_to_map(brb,'brown','#964B00',add_to_map(blb,'black','#3186cc',add_to_map(pb,'blue','#3186cc',map_north_america)))
map_bear_attacks