In [None]:
import os
import pandas as pd
import numpy as np
from astropy import units as u
from astropy.time import Time
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy import stats
%matplotlib widget

#data = pd.read_csv('/Users/87steven/Documents/ASIAA/ACC variability/ACC Xmatch pushkarevkovalev_source_size.csv')
data = pd.read_csv('/Users/87steven/Documents/ASIAA/ACC variability new/BZCAT_Xmatch_ACC_csv.csv')

Name = data.name
cla = data.Class
RA = data.RA
DEC = data.DEC
data_flux = data.flux_ALMA
data_flux_err = data.errflux
data_band = data.band
data_freq = data.freq
obs_date = data.date
#data_b = data.gla_b
#data_l = data.gla_l
#data_2 = data.theta_2
#data_5 = data.theta_5
#obs_8 = data.theta_8
#data_15 = data.theta_15
#data_24 = data.theta_24
#obs_43 = data.theta_43

#######################################################################################################
### put name into a list
namelist = list(set(data.name))
name_index = [[np.nan]*1]*len(namelist) # len = 1051

### put index of each source into an array
for i in range(0, len(namelist)):
    #print(namelsit[i])
    name_index[i] =  np.where( (Name == namelist[i]).values == True )[0]
    

In [None]:
def ClearData():
    data = {
    'Name': [],
    'modu_index_3': [],
    'modu_index_6': [],
    'modu_index_7': []
    }
    return data

#######################################################################################################
### Plot light curve and calculate modulation index
#######################################################################################################
color = ['red', 'green', 'blue', 'orange', 'black', 'pink', 'gray']
bandlist = list(set(data_band))

df = ClearData()

for i in range(0, len(namelist)):   #len(namelist) = 1367
    print('i = ', i ,', Source name: '+namelist[i])
    datalen = len(np.array(data_flux[name_index[i]]))
    
    if datalen >= 0:  
        
        date = obs_date[name_index[i]]
        t = Time(list(date.values), format = 'isot', scale = 'utc')
        MJD_end = max(t.mjd) - min(t.mjd)
        
        index_each_band = [[np.nan]]*len(set(data_band))   # len(set(data_band)) = 7

        for k, band_num in enumerate( set(data_band) ):      
            # find "True" in specific band        
            aa = data_band[name_index[i]] == band_num # band = 3, 4, 6, 7, 8, 9, 10
            # find index of "True" value
            b = np.where( aa == True )[0]
            ## find true index of the band
            index_each_band[k] = np.array( aa.index[b] )
    
            if len(index_each_band[k]) == 0:
                index_each_band[k] = np.array([np.nan])
                
        #######################################################################################################      
        plt.rcParams['figure.figsize'] = [10, 15]
        plt.rcParams['axes.linewidth'] = 3
        plt.rc('font', size = 16)    
        mpl.rcParams['xtick.major.size'] = 6
        mpl.rcParams['xtick.major.width'] = 2
        mpl.rcParams['ytick.major.size'] = 6
        mpl.rcParams['ytick.major.width'] = 2
        fig = plt.figure()
        gs = fig.add_gridspec(7, 1, hspace = 0)
        axs = gs.subplots() # , sharey='row'
        
        for k in range(0, 7): # band: 3, 4, 6, 7, 8, 9, 10            
            if ~np.isnan(index_each_band[k][0]) == True:
                # calculate flux, mean flux, flux erro , date
                flux = np.array(data_flux[index_each_band[k]])
                flux_err = np.array(data_flux_err[index_each_band[k]])
                stdev = np.std(data_flux[index_each_band[k]])
                mean_flux = np.nanmean(flux)
                modulation_index = stdev/mean_flux   # modulation index  
                
                #print('k = '+str(k)+', flux length = '+str(len(flux))+', modulation_index = '+ str(modulation_index))
                
                points = 20
                if k == 0: # band 3
                    df['Name'].append(namelist[i])
                    if modulation_index == 0 or len(flux) < points:
                        df['modu_index_3'].append(np.nan)  
                    else:
                        df['modu_index_3'].append(modulation_index)                    
                elif k == 2: # band 6
                    if modulation_index == 0 or len(flux) < points:
                        df['modu_index_6'].append(np.nan)  
                    else:
                        df['modu_index_6'].append(modulation_index) 
                elif k == 3: # band 7
                    if modulation_index == 0 or len(flux) < points:
                        df['modu_index_7'].append(np.nan)  
                    else:
                        df['modu_index_7'].append(modulation_index) 
                
                # observation date formatting
                band_date = obs_date[index_each_band[k]]
                band_t = Time(list(band_date.values), format='isot', scale='utc')
        
                axs[k].plot(band_t.mjd-min(t.mjd), flux, '.', c = 'black', markersize = 6, label = 'band '+str(bandlist[k]))
                axs[k].errorbar(band_t.mjd-min(t.mjd), flux, color = 'black', fmt = '.', yerr = flux_err, ecolor = "black", capsize = 3)
                axs[k].hlines(y = mean_flux, xmin = -1000, xmax = MJD_end*10, linestyles = '--', linewidth = 3, color = 'red', label = 'Mean flux')
                
                text = 'Modulation Index = %4.3f' %(modulation_index)
                axs[k].text(0.1, max(flux)*0.95, text, verticalalignment = 'top', fontsize = 14)
        
                axs[k].grid(True)
                axs[k].set_xlim(-100, MJD_end*1.1)
                axs[k].set_ylim(min(flux)*0.8, max(flux)*1.1)
                axs[k].legend(loc = 'upper right', bbox_to_anchor = (1.15, 0.9), shadow=True, fontsize = 12, markerscale = 2)    
                
            else:
                if k == 0:   # band 3
                    df['Name'].append(namelist[i])
                    df['modu_index_3'].append(np.nan)                    
                elif k == 2:   # band 6
                    df['modu_index_6'].append(np.nan)
                elif k == 3:   # band 7
                    df['modu_index_7'].append(np.nan)
                
                axs[k].plot(np.nan, np.nan, '.', c = 'black', markersize = 6, label = 'band '+str(bandlist[k]))
        
                axs[k].grid(True)
                axs[k].set_xlim(-100, MJD_end*1.1)
                axs[k].set_ylim(min(flux)*0.8, max(flux)*1.1)
                axs[k].legend(loc = 'upper right', bbox_to_anchor = (1.10, 0.9), shadow=True, fontsize = 12, markerscale = 2)    
        
        for ax in axs.flat:
            ax.set(xlabel = "Sidereal Days since MJD "+str("%.f" % min(t.mjd)), ylabel = 'S (Jy)')
            
        for ax in axs.flat:
            ax.label_outer()
        
        axs[0].set_title(namelist[i] + ' Light Curve', fontsize = 20)
                
        figurename = namelist[i] + ' Light Curve'
        #plotname = '/Users/87steven/Documents/ASIAA/ACC variability/light curve figure gt 30/'+ figurename
        #fig.savefig (plotname, dpi = 300, bbox_inches='tight')
    
        print('================================================ Figure saved succfully ================================================')
    else:
        print('================================== data points are less than 3, no light curve figure ==================================')
        
### save-file
CSVfile = f'/Users/87steven/Documents/ASIAA/ACC variability new/modulation index_ge20_0817.csv'   
dff = pd.DataFrame(df)      
dff.to_csv(CSVfile, index = False)

print('================================================ modulation index.csv saved succfully ================================================')


In [None]:
#######################################################################################################
### Calculate SF and Plot light curve, SF, and SF fitting
### Not yet conplete （2022/5/9)
### Weighting problem of fitting is differnt with Matlab
### Confidence interval estimation have not done yet
#######################################################################################################

bandlist = list(set(data_band))

for i in range(25, 26): # len(namelist)
    datalen = len(np.array(data_flux[name_index[i]]))
    print('i = ', i ,', Source name: ' +namelist[i], ', data length = ', datalen)
    
    Source_name = str(namelist[i])
    
    if datalen > 100:  
        
        index_each_band = [[np.nan]]*len(set(data_band))

        for k, band_num in enumerate( set(data_band) ):  
            #print(k, ',', band_num)
    
            # find "True" in specific band        
            aa = (data_band[name_index[i]] == band_num) # band = 3, 4, 6, 7, 8, 9, 10
            # find index of "True" value
            b = np.where(( aa == True))[0]
            # find true index of the band
            index_each_band[k] = np.array( aa.index[b] )
    
            if len(index_each_band[k]) == 0:
                index_each_band[k] = np.array([ np.nan ])
                
        plt.rcParams['figure.figsize'] = [10, 15]
        plt.rcParams['axes.linewidth'] = 3
        plt.rc('font', size = 16)    
        mpl.rcParams['xtick.major.size'] = 6
        mpl.rcParams['xtick.major.width'] = 2
        mpl.rcParams['ytick.major.size'] = 6
        mpl.rcParams['ytick.major.width'] = 2
        fig = plt.figure()
        gs = fig.add_gridspec(6, 1)
        axs = gs.subplots() # , sharey='row'
                
        for k in range(0, 7):  # i = 0 => band 3, i = 1 => band 4, i = 2 => band 6, i = 3 => band 7, i = 4 => band 8, i = 5 => band 9, i = 6 => band 10
            if ~np.isnan(index_each_band[k][0]) == True:
                
                #print('k = ', k)
                
                # observation date format
                band_date = obs_date[index_each_band[k]]
                band_t = Time(list(band_date.values), format='isot', scale='utc')
                date = band_t.mjd
                
                # calculate flux, mean flux, flux erro , date
                flux = np.array(data_flux[index_each_band[k]])
                flux_err = np.array(data_flux_err[index_each_band[k]])
                mean_flux = np.nanmean(flux)
                ### modulation index calculation
                stdev = np.std(data_flux[index_each_band[k]])
                mean_flux = np.nanmean(flux)
                modulation_index = stdev/mean_flux
                
                ### Set parameters
                dtlag = 4
                makeplots = 1
                
                soldays = date - date[0]
                days = soldays*0.99726957
                nlag = round( (max(date)-min(date))/dtlag )
                
                #######################################################################################################
                ### extract SF
                #######################################################################################################
                sf, stdsf, tlag, nsf = strfnerr(days, flux, flux_err, dtlag, nlag)
                
                tau = tlag
                ####(ADJUSTABLE PARAMETER)#### 
                nprth = 30

                #only select bins in which the number of pairs are above a certain threshold
                #threshold as a percentage of total number of points in lightcurve
                ipl = np.where( (nsf > nprth) & (tau != 0))[0]   # nsf len = 925

                #calculate SF error bars and errors of bin size
                SFerrorbar = 3

                if SFerrorbar == 1:
                    errsf = stdsf/np.sqrt(nsf-1)
                elif SFerrorbar == 2:
                    errsf = np.mean( sf(ipl) )*np.sqrt( tau/max(days) ) ;
                elif SFerrorbar == 3:
                    errsf = stdsf/np.sqrt(nsf-1)*np.sqrt( tau/max(days) ); 
    
                errsfbin = np.ones(len(sf))*(dtlag/2);

                #######################################################################################################
                ###return variables
                #######################################################################################################

                Serrmean = np.median(flux_err)/mean_flux
    
                Dnoise = 2*Serrmean**2

                x = tau[ipl]
                y = sf[ipl]

                errsfuse = errsf[ipl]
                meansf = np.mean(sf[ipl])
                weight = meansf/errsfuse

                def func(x, a, b):
                    return a*( 1+b/1000 )/( 1+b/x )+Dnoise
                ## a = D(1000)
                ## b = tau_char
                ## x = tau
                if len(x) and len(y) > 10:
                    popt, pcov = curve_fit(func, x, y, sigma = 1/weight, absolute_sigma = True)
                    a, b = popt
                    #print(popt[0], popt[1])
                    yfit = func(x, a, b)
                    
                    sigma = np.sqrt(np.diagonal(pcov))
                    bound_upper = func(x, *(popt + sigma))
                    bound_lower = func(x, *(popt - sigma))
                else:
                    x = np.array( [np.nan, np.nan] )
                    y = np.array( [np.nan, np.nan] )
                    yfit = np.array( [np.nan, np.nan] )
                    flux_err = np.nan
                    errsf = np.array( [np.nan, np.nan] )
                    ipl = [0, 1]
   
                if makeplots ==1 :
                
                    if k == 0:
                        j1 = 0
                        j2 = 1
                        plot_fig(days, flux, mean_flux, flux_err, x, y, yfit, errsf, ipl, Dnoise, Source_name, bandlist, j1, j2)   
                        #print('Bnad 3 poltted')
                    elif k == 2:
                        j1 = 2
                        j2 = 3
                        plot_fig(days, flux, mean_flux, flux_err, x, y, yfit, errsf, ipl, Dnoise, Source_name, bandlist, j1, j2)
                        #print('Bnad 6 poltted')
                    elif k == 3:
                        j1 = 4
                        j2 = 5
                        plot_fig(days, flux, mean_flux, flux_err, x, y, yfit, errsf, ipl, Dnoise, Source_name, bandlist, j1, j2)
                        #print('Bnad 7 poltted')
        #figurename = namelist[i] + ' Light Curve and Structure Function'
        #plotname = '/Users/87steven/Documents/ASIAA/ACC variability/light curve and SF gt 100/'+ figurename
        #fig.savefig (plotname, dpi = 300, bbox_inches='tight')
 
        print('================================================ Figure saved succfully ================================================')
            
                

In [None]:
#######################################################################################################
### extract SF
#######################################################################################################
### start of function "strfnerrOVRO.m"
def strfnerr(days, flux, flux_err, dtlag, nlag):
        
    time = days - days[0]

    nd = len(days)

    sf = np.zeros(nlag+1)

    nsf = sf
    stdsf = sf

    caliberr = 0.

    flux_var = flux/mean_flux
    erflux = np.sqrt( (flux_err**2)+( (caliberr*mean_flux)**2) )/mean_flux;

    sumdiffsq = []
    sumerrwalter = []
    stddiffsq = []
    errsfwalter = []

    for i in range(0, nd):
        ti = time[i]
        datai = flux_var[i]
        dataerri = erflux[i]
    
        dti = time[i::] - ti
    
        indti = np.array( np.round_(dti/dtlag) )
        indtip = np.where( (indti>=0) & (indti <= nlag))[0]
    
        fluxj = flux_var[i::]
        erfluxj = erflux[i::];

        dataj = fluxj[indtip]
        dataerrj = erfluxj[indtip]

        ind = 1 + indti[indtip]
        diffsq = (datai-dataj)**2
    
        errwalter = (dataerri + dataerrj)**2+2.*abs( (datai+dataerri)-(dataj-dataerrj) )*(dataerri + dataerrj)
    
        if i == 0:
            indall = ind
            #print('indall = ', indall)
            diffsqall = diffsq
            errwalterall = errwalter
        
        else:
            indall = np.append(indall, ind);
            diffsqall = np.append(diffsqall, diffsq)
            errwalterall = np.append(errwalterall, errwalter)
        
    for k in range(1, nlag+1): 
        
        findlagk = np.where(indall == k)[0] 
            
        diffsqk = diffsqall[findlagk] 
        errwalterk = errwalterall[findlagk] 
            
        sumdiffsq.append( np.sum(diffsqk))
        sumerrwalter.append( np.sum(errwalterk))
        #nsf.append( len(diffsqk))
        nsf[k-1] = len(diffsqk)
        stddiffsq.append( np.std(diffsqk))
        
    sumdiffsq = np.array(sumdiffsq)
    sumerrwalter = np.array(sumerrwalter)
    nsf = np.array( nsf)
    #nsf = np.append( nsf, 0)
    stddiffsq = np.array( stddiffsq)
                
    inp  = np.where( nsf > 0 )[0]

    sf = list(sf)
    aa = sumdiffsq[inp]/nsf[inp]
    for i in range(0, len(inp)): # summation divided by the total number of pairs in each bin
        sf[inp[i]] = aa[i]
    sf = np.array(sf)
        
    errsfwalter.append( sumerrwalter[inp]/nsf[inp])
    errsfwalter = errsfwalter[0]
    stdsf[inp] = stddiffsq[inp]
    tlag = np.linspace(0, nlag, num = nlag+1)*dtlag

    return sf, stdsf, tlag, nsf

### end of function "strfnerrOVRO.m"
#######################################################################################################

In [None]:
#######################################################################################################
### plot light curve, SF, and SF fitting
#######################################################################################################
def plot_fig(days, flux, mean_flux, flux_err, x, y, yfit, errsf, ipl, Dnoise, Source_name, bandlist, j1, j2):        
    ### light curve
    axs[j1].plot(days, flux, '.', c = 'black', markersize = 6, label = 'band '+str(bandlist[k]))
    axs[j1].errorbar(days, flux, color = 'black', fmt = '.', yerr = flux_err, ecolor = "black", capsize = 3)
    axs[j1].hlines(y = mean_flux, xmin = -1000, xmax = 10000, linestyles = '--', linewidth = 3, color = 'red', label = 'Mean flux')
                
    axs[j1].grid(True)
    axs[j1].set_xlim(0, max(days))
    axs[j1].set_ylim(min(flux)*0.8, max(flux)*1.1)
    axs[j1].legend(loc = 'upper left', bbox_to_anchor = (0.01, 0.98), shadow=True, fontsize = 12, markerscale = 2) 
    axs[j1].set(ylabel = 'S (Jy)')
    
    ### SF and SF fitting
    axs[j2].plot(x, y, '.', c = 'black', markersize = 6, label = 'SF')
    axs[j2].errorbar(x, y, color = 'black', fmt = '.', yerr = errsf[ipl], ecolor = "black", capsize = 3)
    axs[j2].plot(x, yfit, '-', c = 'red', markersize = 3, label = 'SF fitting')
    #axs[k].plot(x, bound_upper, '--', c = 'pink')
    #axs[k].plot(x, bound_lower, '--', c = 'pink')
    axs[j2].hlines(y = Dnoise, xmin = -1000, xmax = 10000, linestyles = '--', linewidth = 3, color = 'blue', label = 'Dnoise')
    
    if ~np.isnan(x[0]):
        max_x = max(x)
        max_y = max(y)
    else:
        max_x = 0
        max_y = 0
        
    axs[j2].grid(True)
    axs[j2].set_xlim(.0, max_x*1.)
    axs[j2].set_ylim(0, max_y*1.1)
    axs[j2].legend(loc = 'upper left', bbox_to_anchor = (0.01, 0.98), shadow=True, fontsize = 12, markerscale = 2) 
    axs[j2].set(xlabel = "Sidereal Days since MJD "+str("%.f" % date[0]), ylabel = 'D($\\tau$)')
            
    #for ax in axs.flat:
    #    ax.label_outer()
        
    axs[0].set_title(Source_name+' Light Curve', fontsize = 20)
    
    return

In [None]:
#######################################################################################################
### A code try to estimate confidence interval of SF fitting
### Not yet conplete（5/9)
#######################################################################################################

from lmfit import Model
import lmfit

def func(x, a, b):
    return a*( 1+b/1000 )/( 1+b/x )+Dnoise
## a = D(1000)
## b = tau_char
## x = tau

popt, pcov = curve_fit(func, x, y, p0 = [min(x), min(y)], method = None)
a, b = popt
print('a = %5.4f, b = %.2E, Dnoise = %6.5f' % (popt[0], Decimal(popt[1]), Dnoise) )
yfit = func(x, a, b)

sigma = np.sqrt(np.diagonal(pcov))

### Model
#rams = gmodel.make_params(a = 0, b = 0)
gmodel = Model(func)
result = gmodel.fit(y, x=x, a = min(x), b = min(y))

print('fit reslut report ver 1: \n', result.fit_report())

#######################################################################################################
### lmfit
pars = lmfit.Parameters()

pars.add_many( ('a', min(x)), ('b', min(y)) ) # 200 & 1 is is initial value

def residuals(p):
    return func(x, p['a'], p['b'] )-y

mini = lmfit.Minimizer( residuals, pars)
result = mini.minimize()

print('\nfit reslut report ver 2: \n',lmfit.fit_report( result.params))

ci = lmfit.conf_interval(mini, result)
lmfit.printfuncs.report_ci(ci)

### confidence bound index
#ci['a'][3][1] # lower limit of best fit of a
#ci['b'][3][1] # lower limit of best fit of b

#ci['a'][1][1] # lower limit of 95% confidence bound of a
#ci['b'][1][1] # lower limit of 95% confidence bound of b

#ci['a'][5][1] # upper limit of 95% confidence bound of a
#ci['b'][5][1] # upper limit of 95% confidence bound of b
