Skip to content

Commit

Permalink
Major update to Non parametric to accept data in the same way as fitters
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewReid854 committed Jul 27, 2019
1 parent 392f120 commit faa79a7
Showing 1 changed file with 37 additions and 80 deletions.
117 changes: 37 additions & 80 deletions reliability/Nonparametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,77 +2,61 @@
Non-parametric reliability analysis of failure data
Uses the Kaplan-Meier estimation method to calculate the reliability from failure data.
Censoring is supported and confidence bounds are provided.
The confidence bounds are calculated using the Greenwood formula with Normal approxmation, which is the same as
Right censoring is supported and confidence bounds are provided. Left censoring is not supported.
The confidence bounds are calculated using the Greenwood formula with Normal approximation, which is the same as
featured in Minitab.
inputs:
data - list or numpy array of failure data. Automatic sorting is implemented only if censoring is not specified.
If censoring is specified, the data must be presorted in ascending order so as to avoid any complications that
may arise from pairing censoring codes with data.
censoring - list or numpy array of censoring codes (0=censored, 1=failure). Only right censoring is supported.
This is an optional input, if unspecified, all data will be treated as failures.
failure - an array or list of failure times. Sorting is automatic so times do not need to be provided in any order.
right_censored - an array or list of right censored failure times. Defaults to None.
show_plot - True/False. Default is True
print_results - True/False. Default is True. Will display a pandas dataframe in the console.
plot_CI - plots the upper and lower confidence interval
CI - confidence interval between 0 and 1. Default is 0.95 for 95% CI.
shade_CI - 'gradient' (default) will plot a gradient fill, 'solid' will plot a solid fill, None = CI unshaded.
color_gradient - colormap to use if gradient shading the CI. Specify as string. Default is 'Blues'. Full list of
supported colormaps is available at https://matplotlib.org/users/colormaps.html
shade_CI - True/False. True (default) will plot a solid fill, False will leave the CI areas unshaded.
outputs:
results - dataframe of results
KM - list of Kaplan Meier column from results dataframe. This column is the non parametric estimate of the Survival Function (reliability function).
Example Usage:
d = [3961, 4007, 4734, 5248, 6054, 7298, 7454, 10190, 16890, 17200, 23060, 27160, 28690, 37100, 38700, 40060, 45000, 45670, 49390, 53000, 67000, 69040, 69630, 72280, 77350, 78470, 91680, 105700, 106300, 131900, 150400]
c = [0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0] # censoring codes, where censored = 0
KaplanMeier(data=d,censoring=c)
f = [5248,7454,16890,17200,38700,45000,49390,69040,72280,131900]
rc = [3961,4007,4734,6054,7298,10190,23060,27160,28690,37100,40060,45670,53000,67000,69630,77350,78470,91680,105700,106300,150400]
KaplanMeier(failures = f, right_censored = rc)
'''

#produce a kaplan meier plot of the data
class KaplanMeier:
def __init__(self,data,censoring=None,show_plot=True,print_results=True,plot_CI=True,CI=0.95,shade_CI='gradient',color_gradient='Blues',**kwargs):
def __init__(self,failures=None,right_censored = None,show_plot=True,print_results=True,plot_CI=True,CI=0.95,shade_CI=True,**kwargs):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss
np.seterr(divide='ignore') #divide by zero occurs if last detapoint is a failure so risk set is zero

if type(data)==np.ndarray:
d = list(data)
elif type(data)==list:
d = data
else:
raise TypeError('Incorrect type for data. Data must be numpy array or list.')

if d != sorted(d):
if censoring is None: #automatic sorting is done only if there is no censoring
d = sorted(d)
else:
raise ValueError('Data input must be sorted in ascending order. Due to complications that may arise from pairing censoring codes with data, automatic sorting is not implemented if censoring is specified.')

if censoring is None:
censoring = list(np.ones(len(d)))

if type(censoring)== np.ndarray:
c = list(censoring)
elif type(censoring)==list:
c = censoring
else:
raise TypeError('Incorrect type for censoring. Censoring must be numpy array or list of same length as data.')

if len(c)!= len(d):
raise ValueError('Length of censoring array does not match length of data array.')
if min(c)<0 or max(c)>1:
raise ValueError('Censoring array must contain only the numbers 0 and 1. Use 0 for right censored failures and 1 for actual failures. If all data is actual failures then do not specify the censoring array.')
if failures is None:
raise ValueError('failures must be provided to calculate non-parametric estimates.')
if right_censored is None:
right_censored = [] #create empty array so it can be added in hstack

if shade_CI not in [True, False]:
raise ValueError('shade_CI must be either True or False. Default is True.')

#turn the failures and right censored times into a two lists of times and censoring codes
times = np.hstack([failures, right_censored])
F = np.ones_like(failures)
RC = np.zeros_like(right_censored) #censored values are given the code of 0
cens_code = np.hstack([F, RC])
Data = {'times': times, 'cens_code': cens_code}
df = pd.DataFrame(Data, columns=['times', 'cens_code'])
df2 = df.sort_values(by='times')
d = df2['times'].values
c = df2['cens_code'].values

if CI<0 or CI>1:
raise ValueError('CI must be between 0 and 1. Default is 0.95 for 95% confidence intervals.')

colormap= plt.get_cmap(color_gradient)

n = len(d) #number of items
failures_array = np.arange(1, n + 1) #array of number of items (1 to n)
remaining_array = failures_array[::-1] #items remaining (n to 1)
Expand Down Expand Up @@ -104,31 +88,29 @@ def __init__(self,data,censoring=None,show_plot=True,print_results=True,plot_CI=
KM_upper[KM_upper>1]=1
KM_lower[KM_lower<0]=0

#assemble the pandas dataframe
DATA = {'Failure times': data,
#assemble the pandas dataframe for the output
DATA = {'Failure times': d,
'Censoring code (censored=0)': c,
'Items remaining': remaining_array,
'Kaplan Meier Estimate': KM,
'Lower CI bound': KM_lower,
'Upper CI bound': KM_upper}
df = pd.DataFrame(DATA,columns=['Failure times','Censoring code (censored=0)','Items remaining','Kaplan Meier Estimate','Lower CI bound','Upper CI bound'])
df2 = df.set_index('Failure times')
dfx = pd.DataFrame(DATA,columns=['Failure times','Censoring code (censored=0)','Items remaining','Kaplan Meier Estimate','Lower CI bound','Upper CI bound'])
dfy = dfx.set_index('Failure times')
pd.set_option('display.width', 200) # prevents wrapping after default 80 characters
pd.set_option('display.max_columns', 9) # shows the dataframe without ... truncation
self.results = df2
self.results = dfy
self.KM = KM

if print_results==True:
print(df2) #this will print the pandas dataframe
print(dfy) #this will print the pandas dataframe
#plotting section
if show_plot==True:
KM_x = [0]
KM_y = [1] #adds a start point for 100% reliability at 0 time
KM_y_upper = []
KM_y_lower = []



for i in failures_array:
if i==1:
if c[i-1]==0: #if the first item is censored
Expand Down Expand Up @@ -181,38 +163,13 @@ def __init__(self,data,censoring=None,show_plot=True,print_results=True,plot_CI=
KM_y_upper.append(KM_y_upper[-1])
plt.plot(KM_x,KM_y_lower,alpha=CI_alpha,color=CI_color,linestyle=CI_linestyle)
plt.plot(KM_x, KM_y_upper,alpha=CI_alpha,color=CI_color,linestyle=CI_linestyle)

if shade_CI in ['gradient',True]: # provides the gradient fill
idx = []
for i in range(len(KM_x)):
if i<len(KM_x)-1:
if KM_x[i]==KM_x[i+1]:
idx.append(i) # list of indexes for failures. Used in the gradient fill
for i,x in enumerate(idx):
try:
box_upper = [KM_x[x],KM_x[idx[i+1]],KM_y[x+1],KM_y_upper[x+1]]
box_lower = [KM_x[x],KM_x[idx[i+1]],KM_y[x+1],KM_y_lower[x+1]]
plt.imshow([[0, 0], [1, 1]], extent=box_upper, interpolation='bicubic', cmap=colormap,alpha=CI_alpha, vmin=-0.3, aspect='auto')
plt.imshow([[0, 0], [1, 1]], extent=box_lower, interpolation='bicubic', cmap=colormap,alpha=CI_alpha, vmin=-0.3, aspect='auto')
except: #the last box is treated differently due to index out of bounds
box_upper = [KM_x[x], KM_x[-1], KM_y[x+1], KM_y_upper[x+1]] #used for last shade rectangle
box_lower = [KM_x[x], KM_x[-1], KM_y[x+1], KM_y_lower[x+1]]
if KM_x[x] != KM_x[-1]: #prevents a warning. If the last datapoint is a failure then the box has no height and matplotlib gives a warning
plt.imshow([[0, 0], [1, 1]], extent=box_upper, interpolation='bicubic', cmap=colormap, alpha=CI_alpha,vmin=-0.3, aspect='auto')
plt.imshow([[0, 0], [1, 1]], extent=box_lower, interpolation='bicubic', cmap=colormap,alpha=CI_alpha,vmin=-0.3,aspect='auto')
elif shade_CI=='solid':
title_text = str('Kaplan-Meier reliability estimate\n with ' + str(int(CI * 100)) + '% confidence bounds')
if shade_CI is True:
plt.fill_between(KM_x,KM_y_lower,KM_y_upper,alpha=CI_alpha*0.6,color=CI_color)
elif shade_CI in ['none','None',None,False]:
pass
else:
raise ValueError('Invalid value for shade_CI. Must be either gradient, solid, or None.')

else:
title_text = 'Kaplan-Meier reliability estimate'
plt.xlabel('Failure units')
plt.ylabel('Reliability')
if plot_CI==False:
title_text = 'Kaplan-Meier reliability estimate'
else:
title_text = str('Kaplan-Meier reliability estimate\n with '+str(int(CI*100))+'% confidence bounds')
plt.title(title_text)
plt.xlim([0,max(KM_x)])
plt.ylim([0,1.1])

0 comments on commit faa79a7

Please sign in to comment.