In [165]:
import pandas as pd
import numpy as np
from datetime import datetime, date
from sweref99 import projections
import folium
from folium import plugins
from selenium import webdriver
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, clear_output

In [130]:
path='./data/Insatser till brand i skog och mark 2000-2020.xlsx'

In [131]:
#tm is used for the conversions of easting and northing to longitude and latitude
tm = projections.make_transverse_mercator("SWEREF_99_TM")

In [132]:
df = pd.read_excel(path)


In [213]:
#Here are our headers!
df['TotArea'] = df['arealProduktivSkogsmark_m2']+df['arealAnnanTradbevuxenMark_m2']+df['arealMarkUtanTrad_m2']
df['Acres'] = df['TotArea']/4046.86
df.head(1)


Unnamed: 0,ar,datum,tid,kommun,kommunKortNamn,verksamhetText,sweref99Norr,sweref99Ost,BEJBbrandorsakText,arealProduktivSkogsmark_m2,arealAnnanTradbevuxenMark_m2,arealMarkUtanTrad_m2,TotArea,Acres
0,2000,2000-01-01,00:07:00,1272,Bromölla,Verksamhet inte knuten till en byggnad,6856.0,-905921.0,Fyrverkeri eller pyroteknik,0,6,0,6,0.001483


In [212]:
print(df.shape)


(100307, 14)


In [233]:
#Here I copy the dataframe to not have to read the csv files if i accidently overwrites the dataframe.
copy_df = df

In [234]:
#Getting all causes for a fire 
copy_df['BEJBbrandorsakText'].unique()

array(['Fyrverkeri eller pyroteknik', 'Okänd', 'Annan',
       'Barns lek med eld', 'Annan eldning', 'Avsiktlig brand',
       'Övriga gnistor',
       'Återantändning av brand från tidigare räddningsinsats',
       'Självantändning - biologisk eller kemisk',
       'Grillning eller lägereld', 'Blixtnedslag', 'Heta arbeten',
       'Rökning', 'Tågbromsning', 'Fel i utrustning', 'Uppgift saknas'],
      dtype=object)

In [235]:
def filter_rows_by_values(df, col, values):
    return df[~df[col].isin(values)]

In [236]:
#Filter rows on cause of fire and only keeping "Annan". "Okänd", "Grillning eller lägereld" and "Uppgift saknas"
copy_df = filter_rows_by_values(copy_df, 'BEJBbrandorsakText', ['Annan eldning', 'Avsiktlig brand',
       'Övriga gnistor', 'Barns lek med eld', 'Rökning',
       'Återantändning av brand från tidigare räddningsinsats',
       'Fel i utrustning', 'Blixtnedslag', 'Heta arbeten',
       'Fyrverkeri eller pyroteknik',
       'Självantändning - biologisk eller kemisk',
       'Tågbromsning'])
copy_df

Unnamed: 0,ar,datum,tid,kommun,kommunKortNamn,verksamhetText,sweref99Norr,sweref99Ost,BEJBbrandorsakText,arealProduktivSkogsmark_m2,arealAnnanTradbevuxenMark_m2,arealMarkUtanTrad_m2,TotArea,Acres
1,2000,2000-01-01,01:04:00,1452,Tranemo,Verksamhet inte knuten till en byggnad,,,Okänd,0,0,5,5,0.001236
2,2000,2000-01-02,01:05:00,2283,Sollefteå,"Lantbruk, inte bostad",,,Annan,0,0,15000,15000,3.706577
3,2000,2000-01-02,19:35:00,180,Stockholm,Verksamhet inte knuten till en byggnad,,,Okänd,0,5,0,5,0.001236
5,2000,2000-01-04,23:07:00,1493,Mariestad,Verksamhet inte knuten till en byggnad,6502493.0,437254.0,Okänd,0,2,0,2,0.000494
6,2000,2000-01-05,15:57:00,1442,Vårgårda,Verksamhet inte knuten till en byggnad,6436766.0,374071.0,Okänd,0,0,10,10,0.002471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100292,2020,2020-12-19,21:20:00,2182,Söderhamn,Verksamhet inte knuten till en byggnad,6804987.0,604088.0,Okänd,0,0,1,1,0.000247
100296,2020,2020-12-26,00:55:00,1283,Helsingborg,Verksamhet inte knuten till en byggnad,6216071.0,355402.0,Annan,0,4,0,4,0.000988
100298,2020,2020-12-27,08:47:00,138,Tyresö,Verksamhet inte knuten till en byggnad,6570239.0,686650.0,Okänd,0,2,0,2,0.000494
100304,2020,2020-12-30,12:32:00,380,Uppsala,Verksamhet inte knuten till en byggnad,6637240.0,653312.0,Okänd,0,0,20,20,0.004942


In [237]:
#I drop all fires smaller than one acre and only keep fires that . I check how many fires are left.
copy_df = copy_df[copy_df.Acres>10]
copy_df = copy_df
print(copy_df.shape)

(574, 14)


In [238]:
#To not get to many values I have made a random sample of 200 fires.
sample = copy_df#.sample(200, replace=True)

In [240]:
for column in sample:
    if sample[column].isnull().any():
       print('{0} has {1} null values'.format(column, sample[column].isnull().sum()))

sweref99Norr has 84 null values
sweref99Ost has 84 null values


In [241]:
#Function that calculates number of missing data in column of dataframe and prints result.
def missing(df,column):
       x = len(df)
       if df[column].isnull().any():
           print('{0} has total of {1} null values'.format(column, df[column].isnull().sum()))
           print ('In the column {0}'.format(column), round(df[column].count()-1/x * 100, 3), '% of the cells have missing values')
 

In [242]:
#Missing values of column:
missing(sample,'sweref99Norr')
missing(sample,'sweref99Ost')
sample.isnull().sum(axis = 0)

sweref99Norr has total of 84 null values
In the column sweref99Norr 489.826 % of the cells have missing values
sweref99Ost has total of 84 null values
In the column sweref99Ost 489.826 % of the cells have missing values


ar                               0
datum                            0
tid                              0
kommun                           0
kommunKortNamn                   0
verksamhetText                   0
sweref99Norr                    84
sweref99Ost                     84
BEJBbrandorsakText               0
arealProduktivSkogsmark_m2       0
arealAnnanTradbevuxenMark_m2     0
arealMarkUtanTrad_m2             0
TotArea                          0
Acres                            0
dtype: int64

In [243]:
sample.dtypes

ar                                       int64
datum                           datetime64[ns]
tid                                     object
kommun                                   int64
kommunKortNamn                          object
verksamhetText                          object
sweref99Norr                           float64
sweref99Ost                            float64
BEJBbrandorsakText                      object
arealProduktivSkogsmark_m2               int64
arealAnnanTradbevuxenMark_m2             int64
arealMarkUtanTrad_m2                     int64
TotArea                                  int64
Acres                                  float64
dtype: object

In [244]:
#Functions for converting easting and northing to latitudes and longitudes.
def toLat(E,N):
    lat, lon = tm.grid_to_geodetic(N,E)
    return lat
def toLon(E,N):
    lat, lon = tm.grid_to_geodetic(N,E)
    return lon

In [245]:
#Applying functions to create to new columns, Longitude and Latitude for reported fires.
sample['Latitude'] = sample.apply(lambda row: toLat(row['sweref99Ost'],row['sweref99Norr']),axis=1)
sample['Longitude'] = sample.apply(lambda row: toLon(row['sweref99Ost'],row['sweref99Norr']),axis=1)
sample.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Latitude'] = sample.apply(lambda row: toLat(row['sweref99Ost'],row['sweref99Norr']),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Longitude'] = sample.apply(lambda row: toLon(row['sweref99Ost'],row['sweref99Norr']),axis=1)


Unnamed: 0,ar,datum,tid,kommun,kommunKortNamn,verksamhetText,sweref99Norr,sweref99Ost,BEJBbrandorsakText,arealProduktivSkogsmark_m2,arealAnnanTradbevuxenMark_m2,arealMarkUtanTrad_m2,TotArea,Acres,Latitude,Longitude
219,2000,2000-03-20,13:46:00,1881,Kumla,Verksamhet inte knuten till en byggnad,6560269.0,503531.0,Okänd,0,0,400000,400000,98.842065,59.181544,15.061785
222,2000,2000-03-20,13:50:00,686,Eksjö,Verksamhet inte knuten till en byggnad,6392211.0,493550.0,Annan,0,0,50000,50000,12.355258,57.672108,14.891867
327,2000,2000-03-23,12:16:00,1904,Skinnskatteberg,Verksamhet inte knuten till en byggnad,,,Okänd,0,0,100000,100000,24.710516,,
687,2000,2000-03-30,13:24:00,583,Motala,Verksamhet inte knuten till en byggnad,6506344.0,508179.0,Okänd,0,0,70000,70000,17.297361,58.697196,15.141125
708,2000,2000-03-30,15:15:00,160,Täby,Verksamhet inte knuten till en byggnad,,,Okänd,750000,0,0,750000,185.328872,,
981,2000,2000-04-08,14:00:00,840,Mörbylånga,Verksamhet inte knuten till en byggnad,6263380.0,589043.0,Okänd,0,0,150000,150000,37.065774,56.506396,16.446741
2216,2000,2000-05-07,10:42:00,2085,Ludvika,Verksamhet inte knuten till en byggnad,,,Okänd,2000000,0,0,2000000,494.210326,,
2236,2000,2000-05-07,13:59:00,1862,Degerfors,Verksamhet inte knuten till en byggnad,6562895.0,460235.0,Okänd,0,45000,0,45000,11.119732,59.203278,14.303744
2736,2000,2000-05-13,17:36:00,192,Nynäshamn,Verksamhet inte knuten till en byggnad,6548532.0,669244.0,Okänd,0,120000,0,120000,29.65262,59.042589,17.950074
2788,2000,2000-05-14,13:49:00,188,Norrtälje,Verksamhet inte knuten till en byggnad,6664097.0,701885.0,Okänd,0,250000,0,250000,61.776291,60.064152,18.627699


In [246]:
#We need to group fire sizes here but what sizes should we use? What is a "big" fire and what is a "small" fire?
sample['Acres'].describe()

count     574.000000
mean       63.686832
std       201.456982
min        10.131312
25%        14.826310
50%        21.930583
75%        37.065774
max      2891.130407
Name: Acres, dtype: float64

In [247]:
sample = sample[sample['Latitude'].notna()]
sample = sample[sample['Longitude'].notna()]


In [248]:
#Group fires by size NEEDS TO CHANGE! Any ideas for groups?
small_wildfires = folium.FeatureGroup(TotArea = '< 100 Acres')
medium_wildfires = folium.FeatureGroup(TotArea = '100 - 500 Acres')
large_wildfires = folium.FeatureGroup(TotArea = '1000 - 5000 Acres')
xl_wildfires = folium.FeatureGroup(TotArea = '> 5000 Acres')

In [206]:
#Function that takes the dataframe and returns a map. Circles on the map are colored based on the size of the fire.
def add_ToHeatMap(fireCoords,map):
    map.add_child(plugins.HeatMap(fireCoords, radius=8.5))
    return map

In [187]:
#Function that takes the dataframe and returns a map. Circles on the map are colored based on the size of the fire.
def add_FireCircle(df, m):
    for i, v in df.iterrows():
        
        fire_size = float(v['Acres'])
        
        #When hoovering over the circle, the popup will show the fire name, year, cause, state and size.
        #More values can be added.
        popup = """
        Kommun : <b>%s</b><br>
        Size (Acres) : <b>%s</b><br>
        Cause : <b>%s</b><br>
        Year: <b>%s</b><br>
        """ % (v['kommunKortNamn'], v['Acres'], 
            v['BEJBbrandorsakText'], 
            v['ar'])
        
        
        if fire_size < 100:
            folium.CircleMarker(location = [v['Latitude'], 
                                            v['Longitude']],
                            radius = np.log(fire_size) * 0.8,
                            weight = 0,
                            tooltip = popup,
                            color = '#ffeda0',
                            fill_color = '#ffeda0',
                            fill_opacity = 0.7,
                            fill = True).add_to(small_wildfires)
            
        if fire_size in range(100, 500):
            folium.CircleMarker(location = [v['Latitude'], 
                                            v['Longitude']],
                            radius = np.log(fire_size),
                            weight = 0,
                            tooltip = popup,
                            color = '#feb24c',
                            fill_color = '#feb24c',
                            fill_opacity = 0.7,
                            fill = True).add_to(medium_wildfires)
        
        if fire_size in range(1000, 5000):
            folium.CircleMarker(location = [v['Latitude'], 
                                            v['Longitude']],
                            radius = np.log(fire_size) * 1.5,
                            weight = 0,
                            tooltip = popup,
                            color = '#fc4e2a',
                            fill_color = '#fc4e2a',
                            fill_opacity = 0.7,
                            fill = True).add_to(large_wildfires)

        if fire_size > 5000:
            folium.CircleMarker(location = [v['Latitude'], 
                                            v['Longitude']],
                            radius = np.log(fire_size) * 2,
                            weight = 0,
                            tooltip = popup,
                            color = '##b10026',
                            fill_color = '#b10026',
                            fill_opacity = 0.7,
                            fill = True).add_to(xl_wildfires)

    small_wildfires.add_to(m)
    medium_wildfires.add_to(m)
    large_wildfires.add_to(m)
    xl_wildfires.add_to(m)
    folium.LayerControl(collapsed = False).add_to(m)

    return m

In [207]:
#Map instanciation, with start point at the center of the US. Type of map is set to Stamen Terrain.
map = folium.Map(location = [59.334591, 18.063240],
               tiles = 'Stamen Terrain',
               zoom_start = 5.5)
map

In [148]:
m_sweden = add_FireCircle(sample, map)
m_sweden.save('sweden.html')

In [208]:
locations = sample[['Latitude','Longitude']]
h_sweden = add_ToHeatMap(locations.values,map)

In [209]:
h_sweden