Plot censoring rate, distribution, kde, regression and correlation of simulated data 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import fatiguelife

import os, glob

In [None]:
def plot_corr(data, figsize=(15,15)):
    '''
    Plot correlation 
    Args:
    - data: pd dataframe
    '''
    corr = data.corr()
    sns.set(font_scale=1.2)
    mask = np.triu(np.ones_like(corr, dtype=bool))
    with sns.axes_style("white"):
        f, ax = plt.subplots(figsize=figsize)
        ax = sns.heatmap(corr, mask=mask, square=True, 
                         vmin= -1, vmax=1,
                         cmap='RdBu_r', center=0, annot=True,
                        annot_kws={'fontsize':8})

In [None]:
data = pd.read_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/CopulaGAN_simulated_data_survival_2.csv'))

In [None]:
data_f = data.drop(['Observed', ], axis=1)

### Plotting

In [None]:
sns.countplot(y='Observed', data=data)

In [None]:
plot_corr(data_f, figsize=(10,10))

In [None]:
sns.set(font_scale=1.3)
sns.set_style("whitegrid", {'axes.grid' : False})


fig, axes = plt.subplots(4, 4,  sharex=False, sharey=False, figsize=(15,10))
plt.tight_layout(h_pad=3, w_pad=3)

for ax, col in zip(axes.flatten(), data.drop('Observed', axis=1).columns[1:]):
    
    sns.kdeplot(x=col, y='F_Time', data=data, fill=True, ax=ax)

fig.delaxes(axes[3,3])
fig.delaxes(axes[3,2])
fig.delaxes(axes[3,1])

In [None]:
test = pd.melt(data_f, 
               id_vars=['F_Time'])

In [None]:
sns.lmplot(x='F_Time', y='value', data=test, 
           col='variable', col_wrap=4,
           sharey=False, sharex=False)

In [None]:
c, loc, scale = fatiguelife.fit(data.F_Time)
c, loc, scale

In [None]:
fatiguelife.rvs(c, loc, scale)

In [None]:
plt.plot(fatiguelife.pdf(np.linspace(200007, 361633), c, loc, scale))

In [None]:

fig, axes = plt.subplots(1, 2, figsize=(15,5))
axes[0] = plt.plot(np.linspace(fatiguelife.pdf(200007, c), fatiguelife.pdf(3245267, c), 100))

axes[1] = sns.displot(x=data['F_Time'], kind='hist')

In [None]:
sns.kdeplot(x=data['F_Time'], shade=True, cumulative=True)

In [None]:
np.linspace(fatiguelife.ppf(0.01, c)