In [None]:
def plot_row(axes,row,datacol,colorby,xlabel,vbins=None):
    
    axes[row,0].scatter(datacol,np.log10(k),
                        c=colorby,
                        cmap=plt.cm.coolwarm,marker='o',lw=0,s=30)
              
    axes[row,1].scatter(datacol,np.log10(c),
                        c=colorby,
                        cmap=plt.cm.coolwarm,marker='o',lw=0,s=30)
    
    if vbins != None:
        for v in np.unique(vb):
            axes[row,0].plot(datacol[vbins == v],np.log10(k)[vbins == v],color='k',alpha=0.5)
            axes[row,1].plot(datacol[vbins == v],np.log10(-c)[vbins == v],color='k',alpha=0.5)
    
    axes[row,2].scatter(datacol,x0,
                        c=colorby,
                        cmap=plt.cm.coolwarm,marker='o',lw=0,s=30)
    
    axes[row,3].scatter(datacol,chisq,
                        c=colorby,
                        cmap=plt.cm.coolwarm,marker='o',lw=0,s=30)
    
    ylabels = ['logk','log(-c)','$log(x_0))$',r'$\chi^2$']
    
    for col,ax in enumerate(axes[row,:]):
        ax.set_ylabel(ylabels[col])
        ax.set_xlabel(xlabel)
    
    return None

In [None]:
fig,axes = plt.subplots(4,4,figsize=(30,30))
plt.subplots_adjust(hspace=0.3,wspace=0.3)

chisq = fit_vbin_results['chi2nu']
k = fit_vbin_results['k']
c = fit_vbin_results['c']
l = fit_vbin_results['l']
z = np.log10(fit_vbin_results['redshift'])
y0 = f_logistic(0,k,c,l)
x0 = fit_vbin_results['l']
Mr = fit_vbin_results['Mr']
R50 = np.log10(fit_vbin_results['R50'])
vb = fit_vbin_results['vbin']

axrow = 0
axes[axrow,0].hist(k)
axes[axrow,0].set_xlabel('k')
axes[axrow,1].hist(c)
axes[axrow,1].set_xlabel('c')
axes[axrow,2].scatter(k,c,c=z,cmap=plt.cm.coolwarm,marker='o',lw=0,s=30)
axes[axrow,2].set_xlabel('k')
axes[axrow,2].set_ylabel('c')
axes[axrow,3].hist(x0)
axes[axrow,3].set_xlabel('$x_0$')

plot_row(axes,1,z,Mr,'z')
plot_row(axes,2,Mr,z,'Mr')
plot_row(axes,3,R50,z,'R50')

In [None]:
def make_axes(n_morph,xlabel='$\log(f_v)$',ylabel='cumulative fraction'):

    x_dimension = math.ceil(math.sqrt(n_morph))
    y_dimension = math.ceil(n_morph/x_dimension)
    n_plots = x_dimension*y_dimension
    n_spare = n_plots-n_morph
    remove_axes = np.arange(-n_spare,0)
    fig,axes = plt.subplots(y_dimension,x_dimension,sharex=True,sharey=True,figsize=(5*x_dimension,5*y_dimension))
    if n_morph >= 3:
        for ax in axes[-1,:]:
            ax.set_xlabel(xlabel)
        for ax in axes[:,0]:
            ax.set_ylabel(ylabel) 
    else:
        axes[0].set_xlabel(xlabel)
        axes[0].set_ylabel(ylabel)
        axes[1].set_xlabel(xlabel)
    plt.subplots_adjust(hspace=0,wspace=0)
    axes=axes.ravel()
    for m in remove_axes:
        fig.delaxes(axes[m])
    return fig,axes

In [None]:
n_morph = len(np.unique(vbins))
fig,axes = make_axes(n_morph)
colors = ['b','r']

fv_column = data[question + '_' + answer + '_weighted_fraction']
Mr = vbins_table['Mr']
R50 = vbins_table['R50']

for v in np.unique(vbins):
    zbins_v = zbins[vbins == v]
    z_min = np.min(zbins_v)
    z_max = np.max(zbins_v)
    
    for m,z in enumerate([z_min,z_max]):
        fv_z = fv_column[(vbins == v) & (zbins == z)]
        fv_z = np.sort(fv_z)
        logfv = np.log10(fv_z)
        n = len(fv_z)
                               
        cumfrac = np.linspace(0,1,n)
        axes[v].plot(logfv,cumfrac,color=colors[m],linewidth=2)
        
        row_select = dout[(dout['vbin'] == v) & (dout['zbin'] == z)]
        
        k = row_select['k']
        c = row_select['c']
        l = row_select['l']
        kf = row_select['kf']
        cf = row_select['cf']
        lf = row_select['lf']
        
        kf[kf < kmin] = kmin
        kf[kf > kmax] = kmax
        cf[cf < cmin] = cmin
        cf[cf > cmax] = cmax
        lf[lf < lmin] = lmin
        lf[lf > lmax] = lmax
        
        xg = np.linspace(-1,0,100)
        axes[v].plot(xg,f_exp_pow(xg,k,c,l),linestyle='dotted',color=colors[m],linewidth=2)
        axes[v].plot(xg,f_exp_pow(xg,kf,cf,lf),linestyle='dashed',color=colors[m],linewidth=2)
        axes[v].set_ylim(0,2)
        axes[v].text(-2.5,1.5,'{}'.format(v))

        print(v,kf[0],cf[0],lf[0])
        

In [None]:
z_maxs = []

for v in np.unique(vbins):

    zbins_0 = (vbins == v) & (zbins == 1)
    redshift_0_bin = data['REDSHIFT_1'][zbins_0]
    z_max = np.max(redshift_0_bin)
    #print('v={}'.format(v) + ',z_max={0:.3f}'.format(z_max))
    z_maxs.append(z_max)
    
plt.hist(z_maxs)

In [None]:
def plot_fit_vbin(question,questions,parameter='k', predictor=None):
    
    answerlabels = questions[question]['answerlabels']
    
    n_morph = 1
    fig, axarr = plt.subplots(n_morph, 3, sharey='row', sharex='col', figsize=(15,3*n_morph))
    def plot(ax, x, answer, colourby):
        r = fit_vbin_results[fit_vbin_results['answer'] == answer]
        if predictor is None:
            param = r[parameter]
        else:
            f, p = predictor
            var = np.array([r[c] for c in ['Mr', 'R50', 'redshift']], np.float64)
            if p is not None:
                param = f(var, *p[answer])
            else:
                param = f(var)
        ax.scatter(r[x], param, c=r[colourby], marker='.', edgecolor='')
        y, bin_edges, binnumber = binned_statistic(r[x], r[parameter], statistic='median')
        xc = 0.5 * (bin_edges[:-1] + bin_edges[1:])
        ax.plot(xc, y, 'k-')
        if predictor is not None:
            y, bin_edges, binnumber = binned_statistic(r[x], param, statistic='median')
            xc = 0.5 * (bin_edges[:-1] + bin_edges[1:])
            ax.plot(xc, y, 'k--')
        low, high = plot_limits(r[parameter])
        ax.axis(ymin=low, ymax=high)
    for a, axrow in enumerate(axarr):
        plot(axrow[0], 'redshift', a, 'vbin')
        plot(axrow[1], 'R50', a, 'redshift')
        plot(axrow[2], 'Mr', a, 'redshift')
        axrow[0].set_ylabel("${}$ ({})".format(parameter,answerlabels[a]))
        if a == len(axarr) - 1:
            axrow[0].set_xlabel('$z$')
            axrow[1].set_xlabel('$\log_{10}{R_{50}}$')
            axrow[2].set_xlabel('$M_r$')
    fig.subplots_adjust(hspace=0, wspace=0)

In [None]:
def chisq_fun(p, f, x, y):
    return ((f(x, *p) - y)**2).sum()


def fit_vbin_function(data, vbins, zbins, fit_setup,
                      question,answer,kc_fit_results=None,
                      min_log_fv=min_log_fv,
                      even_sampling=True):
    
    start_time = time.time()

    redshift = data['REDSHIFT_1']
    fv = question + '_' + answer +'_weighted_fraction'
    
    if kc_fit_results is not None:
        kcfunc, kparams, cparams, lparams,kclabel = kc_fit_results
    
    # Set up the list to write the parameters in to:
    param_data = []
    
    max_z_bins_to_plot = 5
    
    bounds = fit_setup['bounds']
    p0 = fit_setup['p0']
    func = fit_setup['func']
    
    colours = ['b','g','k','r']
    xg = np.linspace(-2,0,100)
    
    # Loop over Voronoi magnitude-size bins
    for v in np.unique(vbins):
        vselect = vbins == v
        data_v = data[vselect]
        zbins_v = zbins[vselect]

        z_bins_unique = np.unique(zbins_v)

        for z in z_bins_unique:
            data_z = data_v[zbins_v == z]
            n = len(data_z)
            
            D = data_z[[fv]]
            D.sort(fv)
            D['cumfrac'] = np.linspace(0, 1, n)
                #D = D[D[fv] > min_fv]
            D['log10fv'] = np.log10(D[fv])
            if even_sampling:
                D_fit_log10fv = np.log10(np.linspace(10**(min_log_fv), 1, 1000))
                D = D[(D['log10fv'] > min_log_fv) & (D['log10fv'] < max_log_fv)]
                indices = np.searchsorted(D['log10fv'], D_fit_log10fv)
                D_fit = D[indices.clip(0, len(D)-1)]
            else:
                D_fit = D[D['log10fv'] > min_log_fv]

            res = minimize(chisq_fun, p0,
                           args=(func,
                                 D_fit['log10fv'].astype(np.float64),
                                 D_fit['cumfrac'].astype(np.float64)),
                           bounds=bounds, method='SLSQP')
            p = res.x
            chi2nu = res.fun / (n - len(p))
                
            means = [data_z['PETROMAG_MR'].mean(),
                     np.log10(data_z['PETROR50_R_KPC']).mean(),
                     data_z['REDSHIFT_1'].mean()]

            if len(p) < 2:
                p = np.array([p[0], 10])

            param_data.append([v,z] + means + p[:2].tolist() + # Maybe change output table here
                              [chi2nu])
            
    fit_vbin_results = Table(rows=param_data,
                             names=('vbin','zbin', 'Mr',
                                    'R50', 'redshift', 'k', 'c', 'chi2nu'))
    
    print('All bins fitted! {}s in total'.format(time.time()-start_time))
    
    return fit_vbin_results

In [None]:
def make_fit_setup(function_dictionary,key):
    fit_setup = {}
    fit_setup['func'] = function_dictionary['func'][key]
    fit_setup['bounds'] = function_dictionary['bounds'][key]
    fit_setup['p0'] = function_dictionary['p0'][key]
    fit_setup['inverse'] = function_dictionary['i_func'][key]
    return fit_setup


def get_best_function(data,vbins,zbins,functions,question,answer):
    
    chisq_tot = np.zeros(len(function_dictionary['func'].keys()))
    k_tot = np.zeros(len(function_dictionary['func'].keys()))
    c_tot = np.zeros(len(function_dictionary['func'].keys()))
    
    for n,key in enumerate(function_dictionary['func'].keys()):

        fit_setup = make_fit_setup(functions,key)
        fit_vbin_results = fit_vbin_function(data,vbins,zbins,fit_setup,question,answer)
        
        chisq = np.sum(fit_vbin_results['chi2nu'])
        k = np.mean(fit_vbin_results['k'])
        c = np.mean(fit_vbin_results['c'])
        
        chisq_tot[n] = chisq
        k_tot[n] = k
        c_tot[n] = c
        print('chisq({}) = {}'.format(function_dictionary['label'][key],chisq))
    
    n = np.argmin(chisq_tot)
    keys = [key for key in function_dictionary['func'].keys()]
    key = keys[n]
    fit_setup = make_fit_setup(functions,key)
    fit_setup['p0'] = [k_tot[n],c_tot[n]] # mean values to start the fitting from.
    
    return fit_setup 


def get_fit_setup(fit_setup):

    func = fit_setup['func']
    p0 = fit_setup['p0']
    bounds = fit_setup['bounds']
    
    return func, p0, bounds

In [None]:
from scipy.stats import binned_statistic

colours = 'rbgmy'
x,edges,assignments = binned_statistic(data[volume_ok]['REDSHIFT_1'],data[volume_ok]['REDSHIFT_1'],bins=50)

def get_fraction(data,assignments,threshold):
    
    fracs = np.zeros(len(np.unique(assignments)))
    
    for i,b in enumerate(np.unique(assignments)):
        data_b = data[assignments == b]
        f_greater = data_b > p_th
        frac = np.sum(f_greater)/len(f_greater)
        fracs[i] = frac
    
    return fracs
        

for j,p_th in enumerate(np.linspace(0.5,0.8,2)):
    
    raw_fracs = get_fraction(raw,assignments,p_th)
    bin_fracs = get_fraction(bin_method,assignments,p_th)
    fit_fracs = get_fraction(fit_method,assignments,p_th)
    
    plt.plot(x,raw_fracs,linestyle='solid',color=colours[j],linewidth=2)
    plt.plot(x,bin_fracs,linestyle='dashed',color=colours[j],linewidth=2)
    plt.plot(x,fit_fracs,linestyle='dotted',color=colours[j],linewidth=2)
    
plt.xlabel('redshift')
plt.ylabel('fraction')

# Red: p > 0.5
# Blue: p > 0.8
# Solid: raw
# Dashed: Binning method
# Dotted: Function fitting method

In [None]:
low_z = (data['REDSHIFT_1'] > 0.03) & (data['REDSHIFT_1'] < 0.04)
high_z = (data['REDSHIFT_1'] > 0.07) & (data['REDSHIFT_1'] < 0.08)
bins = np.linspace(0,1,20)
lb = ['raw','Willett+ 13','bin method','fit method']

_ = plt.hist(raw_data[volume_ok*low_z]
             ,histtype='stepfilled',color='k',alpha=0.3
             ,normed=True,bins=bins,label=lb[0])

_ = plt.hist(w13_data[volume_ok*high_z]
             ,histtype='step',color='b',linewidth=3,linestyle='dotted'
             ,normed=True,bins=bins,label=lb[1])

_ = plt.hist(debiased_bin[volume_ok*high_z]
             ,histtype='step',color='g',linewidth=3,linestyle='dashed'
             ,normed=True,bins=bins,label=lb[2])

_ = plt.hist(debiased_fit[volume_ok*high_z]
             ,histtype='step',color='r',linewidth=2,linestyle='solid'
             ,normed=True,bins=bins,label=lb[3])

plt.legend()
plt.xlabel('$p_{smooth}$')
plt.ylabel('fraction')

In [None]:
colours = 'r'

plt.subplot(2,2,1)

plt.scatter(dout['kf'], dout['k'], c=colours,
            marker='.', edgecolor='', alpha=1)

plt.xlabel('$k$ fit')
plt.ylabel('$k$')

#plt.axis([0,50,0,50])

plt.subplot(2,2,2)

plt.scatter(dout['redshift'], dout['kf']-dout['k'], c=colours,
            marker='.', edgecolor='', alpha=1)

plt.xlabel('$z$')
plt.ylabel('$k$ fit - $k$')

plt.subplot(2,2,3)

plt.scatter(dout['Mr'], dout['kf']-dout['k'], c=colours,
            marker='.', edgecolor='', alpha=1)
plt.xlabel('$M_r$')
plt.ylabel('$k$ fit - $k$')

plt.subplot(2,2,4)

plt.scatter(dout['R50'], dout['kf']-dout['k'], c=colours,
            marker='.', edgecolor='', alpha=1)
plt.xlabel('$R_{50}$')
plt.ylabel('$k$ fit - $k$')

In [None]:
colours = 'b'

plt.subplot(2,2,1)

plt.scatter(dout['cf'], dout['c'], c=colours,
            marker='.', edgecolor='', alpha=1)

plt.plot([0, 6], [0, 6], 'k-')
plt.xlabel('$c$ fit')
plt.ylabel('$c$')
_ = plt.axis((0, 2, 0, 2))

plt.subplot(2,2,2)

plt.scatter(dout['redshift'], dout['cf']-dout['c'], c=colours,
            marker='.', edgecolor='', alpha=1)
plt.xlabel('$z$')
plt.ylabel('$c$ fit - $c$')

plt.subplot(2,2,3)

plt.scatter(dout['Mr'], dout['cf']-dout['c'], c=colours,
            marker='.', edgecolor='', alpha=1)
plt.xlabel('$M_r$')
plt.ylabel('$c$ fit - $c$')

plt.subplot(2,2,4)

plt.scatter(dout['R50'], dout['cf']-dout['c'], c=colours,
            marker='.', edgecolor='', alpha=1)
plt.xlabel('$R_{50}$')
plt.ylabel('$c$ fit - $c$')

In [None]:
fig,axes = plt.subplots(2,2,figsize=(15,15),sharex=True,sharey=True)
axes = axes.ravel()
axes[0].set_xlim(0,1)
axes[0].set_ylim(0,1)

axes[0].scatter(raw_data[volume_ok],w13_data[volume_ok]
                ,c=redshifts[volume_ok],cmap=plt.cm.coolwarm,lw=0,alpha=0.2)

axes[1].scatter(raw_data[volume_ok],debiased_bin[volume_ok]
                ,c=redshifts[volume_ok],cmap=plt.cm.coolwarm,lw=0,alpha=0.2)

axes[2].scatter(raw_data[volume_ok],debiased_fit[volume_ok]
                ,c=redshifts[volume_ok],cmap=plt.cm.coolwarm,lw=0,alpha=0.2)

axes[3].scatter(debiased_bin[volume_ok],debiased_fit[volume_ok]
                ,c=redshifts[volume_ok],cmap=plt.cm.coolwarm,lw=0,alpha=0.2)

xlabels = ['raw','raw','raw','bin']
ylabels = ['W13','bin','fit','fit']

for m,ax in enumerate(axes):
    
    ax.set_xlabel('$p_{{{}}}$'.format(xlabels[m]))
    ax.set_ylabel('$p_{{{}}}$'.format(ylabels[m]))

In [None]:
def make_axes(n_morph,xlabel='$\log(f_v)$',ylabel='cumulative fraction'):

    x_dimension = math.ceil(math.sqrt(n_morph))
    y_dimension = math.ceil(n_morph/x_dimension)
    n_plots = x_dimension*y_dimension
    n_spare = n_plots-n_morph
    remove_axes = np.arange(-n_spare,0)
    fig,axes = plt.subplots(y_dimension,x_dimension,sharex=True,sharey=True,figsize=(5*x_dimension,5*y_dimension))
    if n_morph >= 3:
        for ax in axes[-1,:]:
            ax.set_xlabel(xlabel)
        for ax in axes[:,0]:
            ax.set_ylabel(ylabel) 
    else:
        axes[0].set_xlabel(xlabel)
        axes[0].set_ylabel(ylabel)
        axes[1].set_xlabel(xlabel)
    plt.subplots_adjust(hspace=0,wspace=0)
    axes=axes.ravel()
    for m in remove_axes:
        fig.delaxes(axes[m])
    return fig,axes

In [None]:
# Check how well the fitting worked?

f = fit_setup['func']

n = len(np.unique(vbins))
colors = 'br'

fig,axarr = make_axes(n_morph=n)

xg = np.linspace(-2,0,100)

for m,v in enumerate(np.unique(vbins)):
    
    z_bins_v = zbins[(vbins == v)] # HERE IS WHERE WE GO WRONG/ NEED TO CHANGE DEPENDING ON BINS!!!
    high_z = np.max(z_bins_v)
    low_z_data = data[(vbins == v) & (zbins == 1)]
    high_z_data = data[(vbins == v) & (zbins == high_z)]
    
    low_z_params = fit_vbin_results[(fit_vbin_results['vbin'] == v) 
                                    & (fit_vbin_results['zbin'] == 1)]
    high_z_params = fit_vbin_results[(fit_vbin_results['vbin'] == v) 
                                     & (fit_vbin_results['zbin'] == high_z)]  
    param_list = [low_z_params,high_z_params]
    
    for m2,z_data in enumerate([low_z_data,high_z_data]):
        
        params = param_list[m2]
        
        fv = z_data[question + '_' + answer + '_weighted_fraction']
        fv = np.sort(fv)
        cf = np.linspace(0,1,len(fv))
        
        select = fv != 0
        log10fv = np.log10(fv[select])
        cf_s = cf[select]
        
        axarr[m].plot(log10fv,cf_s,color=colors[m2],linewidth=2)
        axarr[m].plot(xg,f(xg,params['k'],params['c']),
                      linestyle='dashed',color=colors[m2],linewidth=1.5)
        
        x = [params['Mr'],params['R50'],params['redshift']]
        k = k_func(x, *kparams[0])
        c = c_func(x, *cparams[0])
        k[k < kmin] = kmin
        k[k > kmax] = kmax
        c[c < cmin] = cmin
        c[c > cmax] = cmax
        if len(params) != 0:
            axarr[m].plot(xg,f(xg,k,c),linestyle='dotted',color=colors[m2],linewidth=2)
            axarr[m].text(0.1,0.9,'v bin = {}'.format(v),transform=axarr[m].transAxes)
        
# Highest redshift bin = red
# Lowest redshift bin = blue

In [None]:
question_order = ['t01_smooth_or_features']
                  #'t02_edgeon',
                  #'t03_bar',
                  #'t04_spiral',
                  #'t10_arms_winding',
                  #'t11_arms_number']

for question in question_order:
    
    for answer in questions[question]['answers'][0]:
        
        if question + '_' + answer + '_debiased_rh' in full_data.colnames:
            print(question + '_' + answer + ' already debiased!')
        else:
            print('----------------------------------')
            print('Question to be debiased:',question)
            print('Answer to be debiased:',answer)
            debiased = bin_and_debias(full_data,question,questions,answer)
            print('----------------------------------')

In [None]:
# stellar mass limited stuff

vl_m = vl[np.isfinite(vl['LOGMSTAR_BALDRY06'])]
plt.scatter(vl_m['LOGMSTAR_BALDRY06'],vl['PETROMAG_MG']-vl['PETROMAG_MR'])

#vl_m['PETROMAG_MR']
plt.ylim(0,1)
plt.xlim(10.3,10.7)