In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date
from sweref99 import projections

In [2]:
path='./data/Brandriskdata 2000-2020.csv'
#'PunktID': str, 'E': str, 'N': str,
types={ 'PunktID': str}

In [3]:
#Reading csv file from path with dtypes
df = pd.read_csv(path, sep=';', dtype=types, )

In [67]:
#tm is used for the conversions of easting and northing to longitude and latitude
tm = projections.make_transverse_mercator("SWEREF_99_TM")

In [130]:
#For now sample is only 50 values
sample = df.sample(50)
sample[['E','N']] = df[['E','N']].fillna(0)
sample['N'] = sample['N'].astype(int)
sample['E'] = sample['E'].astype(int)
sample.head(10)

Unnamed: 0,PunktID,E,N,Kommun,Datum,Temp,Tmedel,Nederbord,RH,Vindhastighet,...,DC,ISI,BUI,FWI,FWI_index,HBV_o,HBV_u,HBV,HBV_index,Gras
12018829,4933,602338,6883520,2132.0,2000-07-11,149,136,44,81,41,...,2043,15,197,2,2.0,75.0,50.0,77.0,1.0,
11072450,4527,445616,6789520,2039.0,2018-09-10,122,12,10,958,3035354,...,3462,0,85,0,1.0,88.0,42.0,90.0,1.0,-1.0
12007747,4926,615475,7309578,2506.0,2017-06-16,183,13,13,455,14997,...,1236,13,418,32,2.0,43.0,70.0,71.0,2.0,2.0
10011077,4108,700821,7012535,2284.0,2012-05-03,105,6,0,303,57,...,357,102,119,112,3.0,63.0,70.0,67.0,2.0,4.0
3343812,1391,624412,7030514,2284.0,2020-07-09,134,11,106,679,2,...,1124,0,42,0,1.0,87.0,61.0,91.0,1.0,-1.0
1692708,703,402240,6770692,2023.0,2011-08-25,133,12,37,945,21,...,1407,1,52,0,1.0,79.0,67.0,84.0,1.0,
7200190,2942,467204,6601452,1760.0,2004-08-16,181,152,17,74,18,...,3222,1,535,28,2.0,51.0,41.0,56.0,3.0,
2904387,1202,687426,7281274,2505.0,2018-07-03,224,16,0,384,2104587,...,2261,75,443,169,3.0,51.0,42.0,47.0,3.0,-1.0
10263324,4199,400076,6795501,2023.0,2014-05-14,6,3,8,357,31,...,0,0,0,0,1.0,95.0,96.0,100.0,1.0,1.0
5571714,2321,601192,7483079,2510.0,2005-04-20,-17,-62,0,73,68,...,0,0,0,0,1.0,100.0,94.0,100.0,1.0,


In [121]:
#Function that calculates number of missing data in column of dataframe and prints result.
def missing(df,column):
       x = len(df)
       if df[column].isnull().any():
           print('{0} has total of {1} null values'.format(column, df[column].isnull().sum()))
           print ('In the column {0}'.format(column), round(df[column].count()-1/x * 100, 3), '% of the cells have missing values')

In [129]:
#Missing values of column 'Gras'
missing(sample,'Gras')
missing(sample,'Vindriktning')
sample.isnull().sum(axis = 0)

Gras has total of 15 null values
In the column Gras 33.0 % of the cells have missing values
Vindriktning has total of 31 null values
In the column Vindriktning 17.0 % of the cells have missing values


PunktID           0
E                 0
N                 0
Kommun            0
Datum             0
Temp              0
Tmedel            1
Nederbord         0
RH                1
Vindhastighet     0
Vindriktning     31
FFMC              1
DMC               1
DC                1
ISI               1
BUI               1
FWI               1
FWI_index         1
HBV_o             1
HBV_u             1
HBV               1
HBV_index         1
Gras             15
Latitude          0
Longitude         0
dtype: int64

In [100]:
#Data info
sample.describe()

Unnamed: 0,E,N,Kommun,FWI_index,HBV_o,HBV_u,HBV,HBV_index,Gras
count,50.0,50.0,50.0,49.0,49.0,49.0,49.0,49.0,35.0
mean,613332.42,7005175.0,1974.44,1.285714,74.061224,64.489796,78.265306,1.55102,1.485714
std,140132.354559,398093.8,784.369112,1.322876,23.51277,21.906166,21.174646,0.818057,1.462702
min,355450.0,6129645.0,125.0,-1.0,0.0,0.0,0.0,0.0,-1.0
25%,517557.0,6679040.0,1748.5,1.0,62.0,53.0,66.0,1.0,1.0
50%,609857.0,7094570.0,2363.5,1.0,78.0,69.0,82.0,1.0,1.0
75%,720308.25,7301608.0,2510.0,2.0,93.0,81.0,100.0,2.0,2.0
max,894469.0,7670400.0,2584.0,5.0,100.0,100.0,100.0,4.0,5.0


In [123]:
#Functions for converting easting and northing to latitudes and longitudes.
def toLat(E,N):
    lat, lon = tm.grid_to_geodetic(N,E)
    return lat
def toLon(E,N):
    lat, lon = tm.grid_to_geodetic(N,E)
    return lon

In [124]:
#Applying functions to create to new columns, Longitude and Latitude for reported fires.
sample['Latitude'] = sample.apply(lambda row: toLat(row['E'],row['N']),axis=1)
sample['Longitude'] = sample.apply(lambda row: toLon(row['E'],row['N']),axis=1)
sample


Unnamed: 0,PunktID,E,N,Kommun,Datum,Temp,Tmedel,Nederbord,RH,Vindhastighet,...,BUI,FWI,FWI_index,HBV_o,HBV_u,HBV,HBV_index,Gras,Latitude,Longitude
13746976,5651,534500,6802426,2121.0,2020-04-12,109,6.0,9,516.0,65,...,127.0,69.0,2.0,61.0,77.0,81.0,1.0,3.0,61.354333,15.64508
13573718,5590,589502,6577364,484.0,2017-09-20,117,8.0,9,623.0,1765884,...,178.0,9.0,1.0,78.0,35.0,77.0,1.0,2.0,59.325588,16.572816
9922545,4077,653237,7043151,2284.0,2004-09-25,99,85.0,0,61.0,34,...,3.0,4.0,1.0,87.0,75.0,83.0,1.0,,63.483558,18.077141
3289658,1377,747792,7101939,2480.0,2018-02-09,2,-2.0,9,965.0,5534387,...,0.0,0.0,-1.0,100.0,95.0,100.0,1.0,1.0,63.955858,20.062132
6591247,2700,787288,7565699,2584.0,2013-03-02,-128,-19.0,9,762.0,37,...,0.0,0.0,1.0,98.0,84.0,100.0,1.0,1.0,68.060621,21.902207
14005215,5752,717890,7449061,2523.0,2002-08-16,209,142.0,0,47.0,34,...,309.0,65.0,2.0,63.0,46.0,56.0,3.0,,67.079704,20.018089
4734652,1974,649549,6577753,125.0,2013-06-14,192,16.0,54,668.0,47,...,224.0,6.0,1.0,71.0,36.0,70.0,2.0,2.0,59.312084,17.627261
18663857,7685,650662,7187888,2481.0,2006-04-21,49,8.0,0,36.0,43,...,0.0,0.0,1.0,100.0,89.0,100.0,1.0,,64.781611,18.170106
7278414,2985,439281,6409002,643.0,2008-07-19,192,156.0,12,57.0,33,...,292.0,22.0,2.0,62.0,46.0,66.0,2.0,,57.81886,13.977899
15162967,6222,670835,7539501,2584.0,2016-03-16,8,2.0,4,748.0,6816781,...,0.0,0.0,1.0,98.0,81.0,100.0,1.0,1.0,67.919316,19.074781
