In [1]:
import geopandas as gp
import geoplot as gplt
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sp

In [2]:
hail = gp.read_file('hailshape/1955-2018-hail-initpoint.shp')
states = gp.read_file('/home/robert/basemaps/counties/cb_2018_us_county_500k.shp')

### To perform this calculation we need to shift the data into arrays for each month. And we'll need to calcuate the mean and stardard deviation for each month. In addition to sorting the data by month, we will only take the storms that produce hail of greater than 1" hailstones. 

In [3]:
years = range(1959, 2019, 1)
months = {}
for m in range(1,13):
    months[m] = []
for i in years:
    for m in range(1,13):
        t = hail[(hail['st']=='CO') & (hail['yr']==i) & (hail['mo']==m)]
        t = t[t['mag'] > 1.]
        months[m].append(len(t))

### We'll generate some functions to handle the gaussian functions we'll need.

In [4]:
def params(array):
    mu = np.mean(array)
    omega = np.std(array)
    return mu, omega

def gaussian(mu, omega, x):
    return 1/(omega*(2*np.pi)**(0.5))*np.exp(-1*(x-mu)**2/(2*omega**2))

def gaussian_int(mu, omega, ran):
    t = 0
    for r in range(len(ran)-1):
        delta = ran[r+1] - ran[r]
        g = gaussian(mu, omega, ran[r])
        t += g*delta
    return t

### Now to test the functions to make sure they are correct. We'll first test that our probability of getting a value less than the mean will result in approximately 50%. It might be slightly different due to the crude integration technique being used. 

In [5]:
gaussian(20, 2, 21)
ran = np.arange(0, 20, 0.0001)

In [6]:
gaussian_int(20, 2, ran)

0.49997007932899457

### Now let's check the integration problem we were doing in the example. What is the probability that our phone will have a battery life of greater than 21 hours. Our crude integration technique won't allow us to push out to infinty. Instead we'll push the value out to 40 (10 standard deviations). 

In [7]:
ran = np.arange(21, 40, 0.1)
gaussian_int(20, 2, ran)

0.317375849569944

### Okay, now we can look at the hail data. Here we'll check the probability of getting 20 hail storms across Colorado in the month of July. 

In [8]:
mu, omega = params(months[7])

In [9]:
gaussian(mu, omega, 20)

0.016396373059243283

In [10]:
ran = np.arange(0, 40, 0.01)
gaussian_int(mu, omega, ran)

0.5888254980044053

In [11]:
print(mu, omega)

17.433333333333334 24.1946045408659
