In [1]:
import numpy as np

In [2]:
import pandas as pd

df = pd.read_csv("lamps.csv", index_col=0)
df.head()

Unnamed: 0_level_0,h,f,K
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,50
1,840,2,48
2,852,1,47
3,936,1,46
4,960,1,45


In [3]:
from empiricaldist import Pmf

In [4]:
pmf = Pmf(df.f.values, index=df.h)
pmf.normalize()
pmf.head()

h
0      0.00
840    0.04
852    0.02
936    0.02
960    0.02
dtype: float64

In [5]:
cdf = pmf.make_cdf()
cdf.tail()

h
1812    0.92
1836    0.94
1860    0.96
1980    0.98
2568    1.00
dtype: float64

In [6]:
cdf[2568]

1.0

In [7]:
surv = pmf.make_surv()
surv.head()

h
0      1.00
840    0.96
852    0.94
936    0.92
960    0.90
dtype: float64

In [8]:
surv[0]

1.0

In [9]:
t = [1,2,2,4,5]
pmf = Pmf.from_seq(t, normalize=False)
cdf = pmf.make_cdf()
cdf

Unnamed: 0,probs
1,1
2,3
4,4
5,5


In [10]:
surv = pmf.make_surv()
surv

Unnamed: 0,probs
1,4
2,2
4,1
5,0


In [11]:
cdf2 = surv.make_cdf()
cdf2

Unnamed: 0,probs
1,1
2,3
4,4
5,5


In [12]:
haz = pmf.make_hazard()
list(haz)

[0.2, 0.5, 0.5, 1.0]

In [13]:
pmf2 = haz.make_pmf()
pmf2

Unnamed: 0,probs
1,1.0
2,2.0
4,1.0
5,1.0


In [14]:
complete = [1,3,6]
ongoing = [2,3,5,7]

pmf_complete = Pmf.from_seq(complete, normalize=False)
pmf_ongoing = Pmf.from_seq(ongoing, normalize=False)

done = pmf_complete + pmf_ongoing
done

Unnamed: 0,probs
1,1.0
2,1.0
3,2.0
5,1.0
6,1.0
7,1.0


In [15]:
list(pmf_complete - pmf_ongoing)

[1.0, -1.0, 0.0, -1.0, 1.0, -1.0]

In [16]:
list(pmf_complete * pmf_ongoing)

[0.0, 0.0, 1.0, 0.0, 0.0, 0.0]

In [17]:
list(pmf_complete / pmf_ongoing)

[inf, 0.0, 1.0, 0.0, inf, 0.0]

In [18]:
surv_complete = pmf_complete.make_surv()
surv_ongoing = pmf_ongoing.make_surv()

at_risk = pmf_complete + pmf_ongoing
s1 = surv_complete(at_risk.index)
s1

array([2., 2., 1., 1., 0., 0.])

In [19]:
s2 = surv_ongoing(at_risk.index)
s2

array([4., 3., 2., 1., 1., 0.])

In [20]:
at_risk = done + s1 + s2
list(at_risk)

[7.0, 6.0, 5.0, 3.0, 2.0, 1.0]

In [21]:
haz = pmf_complete / at_risk
list(haz)

[0.14285714285714285, 0.0, 0.2, 0.0, 0.5, 0.0]

In [22]:
df = pd.DataFrame(index=haz.index)
df['pmf_complete'] = pmf_complete
df['pmf_ongoing'] = pmf_ongoing
df['surv_complete'] = s1
df['surv_ongoing'] = s2
df['at risk'] = at_risk+s1+s2
df['hazard'] = haz
df

Unnamed: 0,pmf_complete,pmf_ongoing,surv_complete,surv_ongoing,at risk,hazard
1,1.0,,2.0,4.0,13.0,0.142857
2,,1.0,2.0,3.0,11.0,0.0
3,1.0,1.0,1.0,2.0,8.0,0.2
5,,1.0,1.0,1.0,5.0,0.0
6,1.0,,0.0,1.0,3.0,0.5
7,,1.0,0.0,0.0,1.0,0.0


In [23]:
from empiricaldist import Surv

t = [1,2,2,3,5]
surv = Surv.from_seq(t, normalize=False)
surv

Unnamed: 0,probs
1,4
2,2
3,1
5,0


In [24]:
surv([0, 1, 2, 3, 4, 5, 6])

array([5., 4., 2., 1., 1., 0., 0.])

In [25]:
surv.ps

array([4, 2, 1, 0])

In [26]:
surv.qs

array([1, 2, 3, 5])

In [27]:
surv.sort_index()

Unnamed: 0,probs
1,4
2,2
3,1
5,0


In [28]:
surv.inverse([-1, 0, 1, 2, 3, 4, 5, 6])

array([ nan,   5.,   3.,   2.,   2.,   1., -inf,  nan])

In [29]:
surv = Surv.from_seq(t)
surv

Unnamed: 0,probs
1,0.8
2,0.4
3,0.2
5,0.0


In [30]:
surv([0, 1, 2, 3, 4, 5, 6])

array([1. , 0.8, 0.4, 0.2, 0.2, 0. , 0. ])

In [31]:
surv.inverse([-0.2, 0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2])

array([ nan,   5.,   3.,   2.,   2.,   1., -inf,  nan])

In [32]:
surv

Unnamed: 0,probs
1,0.8
2,0.4
3,0.2
5,0.0


In [33]:
t = [1, 2, 2, 3, 5]
pmf = Pmf.from_seq(t, normalize=False)
pmf[0] = 0
pmf.sort_index(inplace=True)
pmf

Unnamed: 0,probs
0,0
1,1
2,2
3,1
5,1


In [34]:
pmf(4)

0

In [35]:
pmf('a')

0

In [36]:
xs = [0,1,2,3,4,5,6]

In [37]:
pmf(xs)

array([0, 1, 2, 1, 0, 1, 0])

In [38]:
pmf(tuple(xs))

array([0, 1, 2, 1, 0, 1, 0])

In [39]:
pmf(np.array(xs))

array([0, 1, 2, 1, 0, 1, 0])

In [40]:
pmf(pd.Series(xs))

array([0, 1, 2, 1, 0, 1, 0])

In [41]:
cdf = pmf.make_cdf()
cdf

Unnamed: 0,probs
0,0
1,1
2,3
3,4
5,5


In [42]:
surv = pmf.make_surv()
surv

Unnamed: 0,probs
0,5
1,4
2,2
3,1
5,0


In [43]:
haz = surv.make_hazard()
haz

Unnamed: 0,probs
0,0.0
1,0.2
2,0.5
3,0.5
5,1.0


In [44]:
haz(xs)

array([0. , 0.2, 0.5, 0.5, 0. , 1. , 0. ])