Replicate Brooks and Doswell 2001

Gopi Goteti edited this page Jan 23, 2014 · 3 revisions

Harold Brooks and Charles Doswell used data from the Storm Prediction Center as well as tornado data from other countries to analyze tornadoes by damage category (Brooks, Harold, and Charles A. Doswell. "Some aspects of the international climatology of tornadoes by damage classification." Atmospheric research 56.1 (2001): 191-201.).

The below code produces numbers consistent, if not identical, to those from Brooks and Doswell. The discrepancy could be due to the differences in Tornado data used (due to regular quality control the latest data used here may not be identical to the one used by SPC).

Read raw data add column names similar to those used by Elsner et al 2013 and consistent with documentation.

library(plyr)
library(ggplot2)
library(reshape2)

torn <- read.csv("data/1950-2012_torn.csv", header = FALSE, sep = ",", as.is = TRUE)

colnames(torn) <- c("OM", "YEAR", "MONTH", "DAY", "DATE", "TIME", "TIMEZONE", 
    "STATE", "FIPS", "STATENUMBER", "FSCALE", "INJURIES", "FATALITIES", "LOSS", 
    "CROPLOSS", "SLAT", "SLON", "ELAT", "ELON", "LENGTH", "WIDTH", "NS", "SN", 
    "SG", "F1", "F2", "F3", "F4")

Tornadoes spanning multiple counties are listed separately for each county. Thus, a single tornado could appear multiple times. Identify unique tornadoes using YEAR, OM and NS. Check for uniqueness; especially, tornadoes spanning multiple years (i.e, those which begin on 12/31 and end on 1/1); need to check only those with NS > 1.

dec31 <- subset(torn, MONTH == 12 & DAY == 31 & NS != 1)
jan01 <- subset(torn, MONTH == 1 & DAY == 1 & NS != 1)
if (nrow(dec31) > 0 & nrow(jan01) > 0) {
    stop("check! unique id assignment may not be accurate!")
}
torn$id <- paste(torn$YEAR, torn$MONTH, torn$OM, torn$NS, sep = "-")

str(torn)
## 'data.frame':    58169 obs. of  29 variables:
##  $ OM         : int  1 1 1 2 3 4 5 6 7 8 ...
##  $ YEAR       : int  1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 ...
##  $ MONTH      : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ DAY        : int  3 3 3 3 3 13 25 25 26 11 ...
##  $ DATE       : chr  "1950-01-03" "1950-01-03" "1950-01-03" "1950-01-03" ...
##  $ TIME       : chr  "11:00:00" "11:00:00" "11:10:00" "11:55:00" ...
##  $ TIMEZONE   : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ STATE      : chr  "MO" "MO" "IL" "IL" ...
##  $ FIPS       : int  29 29 17 17 39 5 29 17 48 48 ...
##  $ STATENUMBER: int  1 1 1 2 1 1 2 3 1 2 ...
##  $ FSCALE     : int  3 3 3 3 1 3 2 2 2 2 ...
##  $ INJURIES   : int  3 3 0 3 1 1 5 0 2 0 ...
##  $ FATALITIES : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ LOSS       : num  6 6 5 5 4 3 5 5 0 4 ...
##  $ CROPLOSS   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SLAT       : num  38.8 38.8 38.8 39.1 40.9 ...
##  $ SLON       : num  -90.2 -90.2 -90.1 -89.3 -84.6 ...
##  $ ELAT       : num  38.8 38.8 38.8 39.1 0 ...
##  $ ELON       : num  -90 -90.1 -90 -89.2 0 ...
##  $ LENGTH     : num  9.5 6.2 3.3 3.6 0.1 0.6 2.3 0.1 4.7 9.9 ...
##  $ WIDTH      : int  150 150 100 130 10 17 300 100 133 400 ...
##  $ NS         : int  2 2 2 1 1 1 1 1 1 1 ...
##  $ SN         : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SG         : int  1 2 2 1 1 1 1 1 1 1 ...
##  $ F1         : int  0 189 119 135 161 113 93 91 47 39 ...
##  $ F2         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ F3         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ F4         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ id         : chr  "1950-1-1-2" "1950-1-1-2" "1950-1-1-2" "1950-1-2-1" ...
head(torn)
##   OM YEAR MONTH DAY       DATE     TIME TIMEZONE STATE FIPS STATENUMBER
## 1  1 1950     1   3 1950-01-03 11:00:00        3    MO   29           1
## 2  1 1950     1   3 1950-01-03 11:00:00        3    MO   29           1
## 3  1 1950     1   3 1950-01-03 11:10:00        3    IL   17           1
## 4  2 1950     1   3 1950-01-03 11:55:00        3    IL   17           2
## 5  3 1950     1   3 1950-01-03 16:00:00        3    OH   39           1
## 6  4 1950     1  13 1950-01-13 05:25:00        3    AR    5           1
##   FSCALE INJURIES FATALITIES LOSS CROPLOSS  SLAT   SLON  ELAT   ELON
## 1      3        3          0    6        0 38.77 -90.22 38.83 -90.03
## 2      3        3          0    6        0 38.77 -90.22 38.82 -90.12
## 3      3        0          0    5        0 38.82 -90.12 38.83 -90.03
## 4      3        3          0    5        0 39.10 -89.30 39.12 -89.23
## 5      1        1          0    4        0 40.88 -84.58  0.00   0.00
## 6      3        1          1    3        0 34.40 -94.37  0.00   0.00
##   LENGTH WIDTH NS SN SG  F1 F2 F3 F4         id
## 1    9.5   150  2  0  1   0  0  0  0 1950-1-1-2
## 2    6.2   150  2  1  2 189  0  0  0 1950-1-1-2
## 3    3.3   100  2  1  2 119  0  0  0 1950-1-1-2
## 4    3.6   130  1  1  1 135  0  0  0 1950-1-2-1
## 5    0.1    10  1  1  1 161  0  0  0 1950-1-3-1
## 6    0.6    17  1  1  1 113  0  0  0 1950-1-4-1
tail(torn)
##           OM YEAR MONTH DAY       DATE     TIME TIMEZONE STATE FIPS
## 58164 425349 2012    12  25 2012-12-25 20:09:00        3    AL    1
## 58165 425351 2012    12  25 2012-12-25 20:26:00        3    AL    1
## 58166 425352 2012    12  25 2012-12-25 20:32:00        3    AL    1
## 58167 425358 2012    12  25 2012-12-25 22:24:00        3    AL    1
## 58168 425359 2012    12  25 2012-12-25 22:29:00        3    AL    1
## 58169 421430 2012    12  26 2012-12-26 13:58:00        3    NC   37
##       STATENUMBER FSCALE INJURIES FATALITIES LOSS CROPLOSS  SLAT   SLON
## 58164           0      1        0          0 0.00        0 32.05 -86.85
## 58165           0      0        0          0 0.00        0 32.15 -86.68
## 58166           0      2        0          0 0.00        0 32.20 -86.60
## 58167           0      2        2          0 0.00        0 31.73 -86.15
## 58168           0      1        0          0 0.00        0 32.35 -86.05
## 58169           0      1        0          0 0.01        0 34.79 -76.67
##        ELAT   ELON LENGTH WIDTH NS SN SG  F1 F2 F3 F4               id
## 58164 32.07 -86.82   2.03   300  1  1  1  85  0  0  0 2012-12-425349-1
## 58165 32.16 -86.67   0.62    75  1  1  1  85  0  0  0 2012-12-425351-1
## 58166 32.31 -86.51   9.55   900  1  1  1  85  0  0  0 2012-12-425352-1
## 58167 31.88 -85.96  15.70   600  1  1  1 109  0  0  0 2012-12-425358-1
## 58168 32.36 -86.04   0.72   100  1  1  1 101  0  0  0 2012-12-425359-1
## 58169 34.80 -76.67   0.91   100  1  1  1  31  0  0  0 2012-12-421430-1

Function to summarize counts of unique number of tornadoes by year and month.

count_unique_tornadoes <- function(in_df, monthly_stats = TRUE) {

    require(plyr)

    if (monthly_stats) {
        # some months dont have data; assign NAs to those months
        mon_totals <- expand.grid(MONTH = seq(1, 12), stringsAsFactors = FALSE)

        # number of unique tornadoes per month
        mon_torn <- ddply(.data = in_df, .variables = .(MONTH), .fun = function(x_df) length(unique(x_df$id)), 
            .drop = FALSE)

        mon_totals <- merge(mon_totals, mon_torn, by = "MONTH", all = TRUE)

        # output matrix
        out_mat <- c(nrow(in_df), length(unique(in_df$id)), mon_totals$V1)
        out_mat <- matrix(out_mat, nrow = 1)
        colnames(out_mat) <- c("N_total", "N_unique", month.abb)
    } else {
        # output matrix
        out_mat <- c(nrow(in_df), length(unique(in_df$id)))
        out_mat <- matrix(out_mat, nrow = 1)
        colnames(out_mat) <- c("N_total", "N_unique")
    }

    return(out_mat)
}

Figure 1

Counts of annual average tornadoes by decade per FSCALE category, produced by Brooks and Doswell in Figure 1 of their paper.

Identify decades. Ignore FSCALE of -9 (unknown). Data prior to 1950 is not available. Ignore decade of 2010 because of fewer data points.

time_breaks <- c(1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020)
time_labels <- c("1950s", "1960s", "1970s", "1980s", "1990s", "2000s", "2010s")

torn$time_cat <- cut(torn$YEAR, breaks = time_breaks, labels = time_labels, 
    include.lowest = TRUE, right = FALSE)

events_by_scale <- ddply(.data = subset(torn, YEAR %in% c(1950:2009) & FSCALE %in% 
    c(0:5)), .variables = .(FSCALE, time_cat), .fun = count_unique_tornadoes, 
    monthly_stats = FALSE)

events_by_scale
##    FSCALE time_cat N_total N_unique
## 1       0    1950s     771      767
## 2       0    1960s    1369     1367
## 3       0    1970s    2180     2178
## 4       0    1980s    3280     3278
## 5       0    1990s    7387     7373
## 6       0    2000s    8201     8152
## 7       1    1950s    1666     1660
## 8       1    1960s    2270     2257
## 9       1    1970s    3434     3416
## 10      1    1980s    3339     3307
## 11      1    1990s    3311     3273
## 12      1    2000s    3455     3383
## 13      2    1950s    1416     1383
## 14      2    1960s    1892     1859
## 15      2    1970s    1878     1832
## 16      2    1980s    1241     1211
## 17      2    1990s    1093     1064
## 18      2    2000s     971      932
## 19      3    1950s     393      364
## 20      3    1960s     489      441
## 21      3    1970s     502      474
## 22      3    1980s     332      308
## 23      3    1990s     383      338
## 24      3    2000s     290      265
## 25      4    1950s     138      108
## 26      4    1960s     123       99
## 27      4    1970s     135      109
## 28      4    1980s      91       64
## 29      4    1990s      95       82
## 30      4    2000s      53       45
## 31      5    1950s      20       12
## 32      5    1960s      16       11
## 33      5    1970s      25       14
## 34      5    1980s       5        3
## 35      5    1990s      10       10
## 36      5    2000s       2        2

To get annual average per decade divide unique tornadoes in the decade by 10.

gfx_line <- ggplot(data = events_by_scale, aes(x = FSCALE, y = N_unique/10, 
    group = time_cat))
gfx_line <- gfx_line + geom_line(aes(colour = time_cat))
gfx_line <- gfx_line + geom_point(aes(colour = time_cat))
gfx_line <- gfx_line + ylab("Number") + scale_y_log10(breaks = 10^seq(-1, 3))
gfx_line <- gfx_line + theme(legend.title = element_blank())
gfx_line <- gfx_line + ggtitle("Average Annual Number of Tornadoes by decade, 1950-2009")
png("gfx_brooks2001_fig1.png")
plot(gfx_line)
garbage <- dev.off()

Figure 1

Figure 3

Counts of tornadoes by FSCALE and region. Consistent with Brooks and Doswell, split mainland US into four regions. Discard non-mainland USA regions.

levels(as.factor(torn$STATE))
##  [1] "AK" "AL" "AR" "AZ" "CA" "CO" "CT" "DC" "DE" "FL" "GA" "HI" "IA" "ID"
## [15] "IL" "IN" "KS" "KY" "LA" "MA" "MD" "ME" "MI" "MN" "MO" "MS" "MT" "NC"
## [29] "ND" "NE" "NH" "NJ" "NM" "NV" "NY" "OH" "OK" "OR" "PA" "PR" "RI" "SC"
## [43] "SD" "TN" "TX" "UT" "VA" "VT" "WA" "WI" "WV" "WY"
torn <- subset(torn, !(STATE %in% c("AK", "HI", "PR")))
Fn_Assign_Region <- function(state_name) {
    if (state_name %in% c("OK", "KS", "NE")) {
        return("Central US")
    } else if (state_name %in% c("FL")) {
        return("Florida")
    } else if (state_name %in% c("WA", "OR", "CA", "MT", "ID", "NV", "UT", "CO", 
        "WY", "NM", "AZ", "TX", "ND", "SD")) {
        return("Western US")
    } else {
        return("Eastern US")
    }
}

torn$Region <- sapply(torn$STATE, FUN = Fn_Assign_Region, USE.NAMES = FALSE)

Tornadoes by FSCALE and Region. Set F2 counts to 100, consitent with Brooks and Doswell, and scale the counts in the other categories accordingly.

events_by_region <- ddply(.data = subset(torn, YEAR %in% c(1950:1995) & FSCALE %in% 
    c(0:5)), .variables = .(FSCALE, Region), .fun = count_unique_tornadoes, 
    monthly_stats = FALSE)

events_by_region
##    FSCALE     Region N_total N_unique
## 1       0 Central US    2175     2175
## 2       0 Eastern US    3816     3806
## 3       0    Florida    1007     1007
## 4       0 Western US    4773     4768
## 5       1 Central US    1928     1914
## 6       1 Eastern US    6914     6865
## 7       1    Florida     649      647
## 8       1 Western US    3207     3195
## 9       2 Central US    1166     1150
## 10      2 Eastern US    4086     3965
## 11      2    Florida     276      275
## 12      2 Western US    1558     1550
## 13      3 Central US     370      341
## 14      3 Eastern US    1156     1059
## 15      3    Florida      29       29
## 16      3 Western US     398      379
## 17      4 Central US     111       92
## 18      4 Eastern US     375      295
## 19      4    Florida       3        2
## 20      4 Western US      61       53
## 21      5 Central US      17       11
## 22      5 Eastern US      45       28
## 23      5 Western US       9        8
events_by_region <- dcast(events_by_region, Region ~ FSCALE, value.var = "N_unique")
region_names <- events_by_region$Region
events_by_region <- events_by_region[, c(-1)] * (100/events_by_region[, "2"])
events_by_region$Region <- region_names
events_by_region <- melt(events_by_region, id.vars = c("Region"))

events_by_region
##        Region variable    value
## 1  Central US        0 189.1304
## 2  Eastern US        0  95.9899
## 3     Florida        0 366.1818
## 4  Western US        0 307.6129
## 5  Central US        1 166.4348
## 6  Eastern US        1 173.1400
## 7     Florida        1 235.2727
## 8  Western US        1 206.1290
## 9  Central US        2 100.0000
## 10 Eastern US        2 100.0000
## 11    Florida        2 100.0000
## 12 Western US        2 100.0000
## 13 Central US        3  29.6522
## 14 Eastern US        3  26.7087
## 15    Florida        3  10.5455
## 16 Western US        3  24.4516
## 17 Central US        4   8.0000
## 18 Eastern US        4   7.4401
## 19    Florida        4   0.7273
## 20 Western US        4   3.4194
## 21 Central US        5   0.9565
## 22 Eastern US        5   0.7062
## 23    Florida        5       NA
## 24 Western US        5   0.5161

Graphic similar to Figure 3 of Brooks and Doswell.

gfx_line <- ggplot(data = events_by_region, aes(x = variable, y = value, group = Region))
gfx_line <- gfx_line + geom_line(aes(colour = Region))
gfx_line <- gfx_line + geom_point(aes(colour = Region))
gfx_line <- gfx_line + ylab("Number") + scale_y_log10(breaks = 10^seq(-1, 3))
gfx_line <- gfx_line + xlab("FSCALE")
gfx_line <- gfx_line + theme(legend.title = element_blank())
gfx_line <- gfx_line + ggtitle("Number of Tornadoes, per 100 F2s, by region, 1950-1995")
png("gfx_brooks2001_fig3.png")
plot(gfx_line)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
garbage <- dev.off()

Figure 3

Brooks and Doswell considered only data from 1950-1995. Here is the graphic with data from 1950-2012.

events_by_region <- ddply(.data = subset(torn, YEAR %in% c(1950:2012) & FSCALE %in% 
    c(0:5)), .variables = .(FSCALE, Region), .fun = count_unique_tornadoes, 
    monthly_stats = FALSE)

events_by_region <- dcast(events_by_region, Region ~ FSCALE, value.var = "N_unique")
region_names <- events_by_region$Region
events_by_region <- events_by_region[, c(-1)] * (100/events_by_region[, "2"])
events_by_region$Region <- region_names
events_by_region <- melt(events_by_region, id.vars = c("Region"))

gfx_line <- ggplot(data = events_by_region, aes(x = variable, y = value, group = Region))
gfx_line <- gfx_line + geom_line(aes(colour = Region))
gfx_line <- gfx_line + geom_point(aes(colour = Region))
gfx_line <- gfx_line + ylab("Number") + scale_y_log10(breaks = 10^seq(-1, 3))
gfx_line <- gfx_line + xlab("FSCALE")
gfx_line <- gfx_line + theme(legend.title = element_blank())
gfx_line <- gfx_line + ggtitle("Number of Tornadoes, per 100 F2s, by region, 1950-2012")
png("gfx_brooks2001_fig3_new.png")
plot(gfx_line)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
garbage <- dev.off()

Figure 3 New