In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy
import scipy.stats

In [None]:
ResultsPath = '../../Data/SummaryResults/'
FiguresPath = '../../Data/Figures/'
if not os.path.isdir(FiguresPath):
    os.mkdir(FiguresPath)
ExpectedRegionCount=28
ExpectedImageCount=221


In [None]:
#step 1: load all the summary results for each image
ML_df = pd.read_csv(ResultsPath+'LORO.csv',index_col=0)
ML_image_df = pd.read_csv(ResultsPath+'LORO_by_image.csv',index_col=0)
Clustering_df = pd.read_csv(ResultsPath+'Clustering_LORO.csv',index_col=0)
Clustering_image_df = pd.read_csv(ResultsPath+'Clustering_LORO_by_image.csv',index_col=0)

ML_df['Bubblesize']=(np.square(ML_df['Region Dice'])*3000).astype('int')
Clustering_df['Bubblesize']=(np.square(Clustering_df['Region Dice'])*3000).astype('int')

ML_image_df['Bubblesize']=(np.square(ML_image_df['Dice'])*3000).astype('int')
Clustering_image_df['Bubblesize']=(np.square(Clustering_image_df['Dice'])*3000).astype('int')

ML_df=ML_df.sort_values('Region Dice')
Clustering_df=Clustering_df.reindex(ML_df.index)

assert ML_image_df.shape[0]==ExpectedImageCount
assert Clustering_image_df.shape[0]==ExpectedImageCount
assert ML_df.shape[0]==ExpectedRegionCount
assert Clustering_df.shape[0]==ExpectedRegionCount

In [None]:
ML_df

In [None]:
### summary info
TotalPixels = ML_df['Total TP'].sum()+ML_df['Total FP'].sum()+ML_df['Total FN'].sum()+ML_df['Total TN'].sum()
HumanArea = ML_df['Total TP'].sum()+ML_df['Total FN'].sum()
CNNArea = ML_df['Total TP'].sum()+ML_df['Total FP'].sum()
CNNDice = 2*ML_df['Total TP'].sum()/(2*ML_df['Total TP'].sum()+ML_df['Total FP'].sum()+ML_df['Total FN'].sum())
CNN_Recall = ML_df['Total TP'].sum()/(ML_df['Total TP'].sum()+ML_df['Total FN'].sum())
CNN_Precision = ML_df['Total TP'].sum()/(ML_df['Total TP'].sum()+ML_df['Total FP'].sum())
CNN_Specificity = ML_df['Total TN'].sum()/(ML_df['Total TN'].sum()+ML_df['Total FP'].sum())

CNN_TotalCentresCorrect = ML_df['Total Centre Correct Count'].sum()/ML_df['Total Centre P4 Count'].sum()

TotalPixelsClustering = Clustering_df['Total TP'].sum()+Clustering_df['Total FP'].sum()+Clustering_df['Total FN'].sum()+Clustering_df['Total TN'].sum()
ClusteringArea = Clustering_df['Total TP'].sum()+Clustering_df['Total FP'].sum()
ClusteringDice = 2*Clustering_df['Total TP'].sum()/(2*Clustering_df['Total TP'].sum()+Clustering_df['Total FP'].sum()+Clustering_df['Total FN'].sum())
Clustering_Recall = Clustering_df['Total TP'].sum()/(Clustering_df['Total TP'].sum()+Clustering_df['Total FN'].sum())
Clustering_Precision = Clustering_df['Total TP'].sum()/(Clustering_df['Total TP'].sum()+Clustering_df['Total FP'].sum())
Clustering_Specificity = Clustering_df['Total TN'].sum()/(Clustering_df['Total TN'].sum()+Clustering_df['Total FP'].sum())

Clustering_TotalCentresCorrect = Clustering_df['Total Centre Correct Count'].sum()/Clustering_df['Total Centre P4 Count'].sum()

print(HumanArea/TotalPixels,CNNArea/TotalPixels,ClusteringArea/TotalPixelsClustering)



In [None]:
## summary figure with subplots                                                             
fig, [ax1,ax2] = plt.subplots(nrows=2, ncols=1,figsize=(20,25),gridspec_kw={'height_ratios': [1, 5]})

matplotlib.rc('xtick', labelsize=30) 
matplotlib.rc('ytick', labelsize=30)

ax2.barh(['Clustering     '],100*Clustering_Specificity,label='Specificity',color='deepskyblue')
ax2.barh(['CNN     '],100*CNN_Specificity,color='deepskyblue')
ax2.barh(['    '],[0])
ax2.barh(['Clustering    '],100*Clustering_TotalCentresCorrect,label='Centre Overlap',color='indigo')
ax2.barh(['CNN    '],100*CNN_TotalCentresCorrect,color='indigo')
ax2.barh(['   '],[0])
ax2.barh(['Clustering   '],100*Clustering_Precision,label='Precision',color='purple')
ax2.barh(['CNN   '],100*CNN_Precision,color='purple')
ax2.barh(['  '],[0])
ax2.barh(['Clustering  '],100*Clustering_Recall,label='Recall',color='green')
ax2.barh(['CNN  '],100*CNN_Recall,color='green')
ax2.barh([' '],[0])
ax2.barh(['Clustering '],100*ClusteringDice,label='Dice Coefficient',color='blue')
ax2.barh(['CNN '],100*CNNDice,color='blue')
#plt.barh([''],[0])
ax1.barh(['Clustering'],100*ClusteringArea/TotalPixelsClustering,label='Area coverage',color='blue')
ax1.barh(['CNN'],100*CNNArea/TotalPixels,color='blue')
ax1.barh(['P4 Catalog'],100*HumanArea/TotalPixels,color='blue')

print(HumanArea/TotalPixels,CNNArea/TotalPixels,ClusteringArea/TotalPixelsClustering)

ax1.set_xlabel('Percent',fontsize=30)
ax2.set_xlabel('Percent',fontsize=30)
#plt.ylabel('Percent (%)',fontsize=40)
ax=plt.gca()
#ax.set_xlim(0,1)
#ax.set_ylim(0,1)
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles[::-1], labels[::-1],fontsize=30,loc='upper right')
handles, labels = ax2.get_legend_handles_labels()
ax2.legend(handles[::-1], labels[::-1],fontsize=30,loc='upper right')

plt.tight_layout(pad=10.0)
plt.savefig(FiguresPath+'Figure6.pdf')
plt.show()

In [None]:
#regional summary
fig, [ax1,ax2,ax3] = plt.subplots(nrows=3, ncols=1,figsize=(20,25))

#plot Dice coefficient
ax1.plot(ML_df['Region Dice'],marker='o',markersize=20,label='CNN',color='b')
ax1.plot(Clustering_df['Region Dice'],'s',markersize=20,label='Clustering',color='g')
ax1.plot(ML_df['Region Dice'].median()*np.ones(ML_df['Region Dice'].shape[0]),'--b',label='Median over regions, CNN',linewidth=5)
ax1.plot(Clustering_df['Region Dice'].median()*np.ones(Clustering_df['Region Dice'].shape[0]),'-.g',label='Median over regions, Clustering',linewidth=5)

#plot centre overlap
ax2.plot(ML_df['Region Centre Correct Fraction'],'-o',markersize=20,label='CNN',color='b')
ax2.plot(Clustering_df['Region Centre Correct Fraction'],'s',markersize=20,label='CNN',color='g')
ax2.plot(ML_df['Region Centre Correct Fraction'].median()*np.ones(ML_df['Region Centre Correct Fraction'].shape[0]),'--b',label='Median over regions, CNN',linewidth=5)
ax2.plot(Clustering_df['Region Centre Correct Fraction'].median()*np.ones(Clustering_df['Region Centre Correct Fraction'].shape[0]),'-.g',label='Median over regions, Clustering',linewidth=5)

#plot log area ratio
ax3.plot(np.log(ML_df['Region Area Ratio']),'-o',markersize=20,color='b',label='CNN')
ax3.plot(np.log(Clustering_df['Region Area Ratio']),'s',markersize=20,label='Clustering',color='g')
ax3.plot(np.log(ML_df['Region Area Ratio'].median())*np.ones(ML_df['Region Area Ratio'].shape[0]),'--b',label='Median over regions, CNN',linewidth=5)
ax3.plot(np.log(Clustering_df['Region Area Ratio'].median())*np.ones(Clustering_df['Region Area Ratio'].shape[0]),'-.g',label='Median over regions, Clustering',linewidth=5)

ax1.set_xticklabels([])
ax1.set_xticks([])
ax2.set_xticklabels([])
ax2.set_xticks([])
ax3.set_xticklabels(ML_df.index,rotation=90,fontsize=30)
ax1.set_ylabel('Dice Coefficient',fontsize=30)
ax2.set_ylabel('Centre Overlap',fontsize=30)
ax3.set_ylabel('Log area ratio',fontsize=30)
ax1.legend(fontsize=30)
ax1.set_ylim(0,1)
ax2.set_ylim(0,1)

matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20)

fig.tight_layout()
plt.savefig(FiguresPath+'Figure7.pdf')
plt.show()

In [None]:
#more regional summmary
fig=plt.figure(figsize=(15,15))

plt.scatter('Region Precision','Region Recall',s='Bubblesize',data=ML_df,alpha=0.7,label='CNN',color='blue')
plt.scatter('Region Precision','Region Recall',s='Bubblesize',data=Clustering_df,alpha=0.7,label='Clustering',color='green')

plt.xlabel('Recall',fontsize=40)
plt.ylabel('Precision',fontsize=40)
ax=plt.gca()
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend(fontsize=40,loc='upper left')
plt.tight_layout()
plt.savefig(FiguresPath+'Figure8.pdf')
plt.show()

fig=plt.figure(figsize=(15,15))


plt.scatter('Precision','Recall',s='Bubblesize',data=ML_image_df,alpha=0.7,label='CNN',color='blue')
plt.scatter('Precision','Recall',s='Bubblesize',data=Clustering_image_df,alpha=0.7,label='Clustering',color='green')

plt.xlabel('Recall',fontsize=40)
plt.ylabel('Precision',fontsize=40)
ax=plt.gca()
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.legend(fontsize=40,loc='upper left')
plt.tight_layout()
plt.savefig(FiguresPath+'Figure10.pdf')
plt.show()

In [None]:
#Per image histograms
fig, [ax1,ax2,ax3] = plt.subplots(nrows=3, ncols=1,figsize=(20,25))

#plot Dice coefficient
aa=Clustering_image_df.dropna()
ax1.hist(ML_image_df['Dice']-0.05/3,bins=np.arange(0,1,0.1)-0.05/3,alpha=1,label='CNN',color='b',rwidth=0.333)
ax1.hist(aa['Dice']+0.05/3,bins=np.arange(0,1,0.1)+0.05/3,alpha=1,label='Clustering',color='g',rwidth=0.333)
ax1.set_xlim(0,1)
ax1.set_xticks(np.arange(0.05,0.95,0.1))
ax1.set_xlabel('Dice Coefficient',fontsize=30)
ax1.set_ylabel('Image Count',fontsize=30)
ax1.legend(fontsize=40,loc='upper left')
#ax1.set_ylim(0,1)

#plot centre overlap
ax2.hist(ML_image_df['Centre Correct Fraction']-0.05/3,bins=np.arange(0,1.1,0.1)-0.05/3,alpha=1,label='CNN',color='b',rwidth=0.333)
ax2.hist(aa['Centre Correct Fraction']+0.05/3,bins=np.arange(0,1.1,0.1)+0.05/3,alpha=1,label='Clustering',color='g',rwidth=0.333)
ax2.set_xlim(0,1)
ax2.set_xticks(np.arange(0.05,1.05,0.1))
ax2.set_xlabel('Centre Overlap',fontsize=30)
ax2.set_ylabel('Image Count',fontsize=30)
#ax2.set_ylim(0,1)

#area ratio
ax3.hist(np.log(ML_image_df['Area Ratio'])+8/30,bins=np.arange(-4,4+8/30,8/10)+8/30,alpha=1,label='CNN',color='b',rwidth=0.333)
ax3.hist(np.log(aa['Area Ratio'])-8/30,bins=np.arange(-4,4+8/30,8/10)-8/30,alpha=1,label='Clustering',color='g',rwidth=0.333)
ax3.set_xlim(-4,4)
ax3.set_xticks(np.arange(-4+8/10,4,8/10))
#ax3.set_xticks(np.arange(0.05,0.95,0.1))
ax3.set_xlabel('Log Area Ratio',fontsize=30)
ax3.set_ylabel('Image Count',fontsize=30)

matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20)

plt.tight_layout()
plt.savefig(FiguresPath+'Figure9.pdf')
fig.show()

In [None]:
#solar longitude
r1 = np.corrcoef(ML_image_df['SOLAR_LONGITUDE'], ML_image_df['Dice'])
r2=scipy.stats.pearsonr(ML_image_df['SOLAR_LONGITUDE'], ML_image_df['Dice'])    # Pearson's r
r3=scipy.stats.spearmanr(ML_image_df['SOLAR_LONGITUDE'], ML_image_df['Dice'])   # Spearman's rho
r4=scipy.stats.kendalltau(ML_image_df['SOLAR_LONGITUDE'], ML_image_df['Dice']) 

print(r1)
print(r2)
print(r3)
print(r4)

r1 = np.corrcoef(Clustering_image_df['SOLAR_LONGITUDE'], Clustering_image_df['Dice'])
r2=scipy.stats.pearsonr(Clustering_image_df['SOLAR_LONGITUDE'], Clustering_image_df['Dice'])    # Pearson's r
r3=scipy.stats.spearmanr(Clustering_image_df['SOLAR_LONGITUDE'], Clustering_image_df['Dice'])   # Spearman's rho
r4=scipy.stats.kendalltau(Clustering_image_df['SOLAR_LONGITUDE'], Clustering_image_df['Dice']) 

print(r1)
print(r2)
print(r3)
print(r4)

In [None]:
#solar longitude plot
fig=plt.figure(figsize=(15,15))
plt.scatter('SOLAR_LONGITUDE','Dice',data=ML_image_df,s=100,alpha=0.7,label='CNN',color='blue')
plt.scatter('SOLAR_LONGITUDE','Dice',data=Clustering_image_df,s=100,alpha=0.7,label='Clustering',color='green',marker='s')


plt.xlabel('Solar Longitude (degrees)',fontsize=40)
plt.ylabel('Dice Coefficient',fontsize=40)
plt.plot([280,280],[0,1],color='k',linestyle='--',linewidth=10)
ax=plt.gca()
#ax.set_xlim(0,1)
ax.set_ylim(0.01,1)
ax.legend(fontsize=40,loc='upper left')
plt.tight_layout()
plt.savefig(FiguresPath+'Figure11a.pdf')
plt.show()

In [None]:
ML_image_df

In [None]:
ML_image_df['Area_P4']=(ML_image_df['TP']+ML_image_df['FN'])/(ML_image_df['TP']+ML_image_df['FN']+ML_image_df['FP']+ML_image_df['TN'])
ML_image_df['Area_CNN']=(ML_image_df['TP']+ML_image_df['FP'])/(ML_image_df['TP']+ML_image_df['FN']+ML_image_df['FP']+ML_image_df['TN'])
Clustering_image_df['Area_cl']=(Clustering_image_df['TP']+Clustering_image_df['FP'])/(Clustering_image_df['TP']+Clustering_image_df['FN']+Clustering_image_df['FP']+Clustering_image_df['TN'])

fig=plt.figure(figsize=(15,15))
plt.scatter('SOLAR_LONGITUDE','Area_CNN',data=ML_image_df,s=100,alpha=0.7,label='CNN',color='blue')
plt.scatter('SOLAR_LONGITUDE','Area_cl',data=Clustering_image_df,s=100,alpha=0.7,label='Clustering',color='green',marker='s')
plt.scatter('SOLAR_LONGITUDE','Area_P4',data=ML_image_df,s=100,alpha=0.7,label='Planet Four',color='red',marker='^')


plt.xlabel('Solar Longitude (degrees)',fontsize=40)
plt.ylabel('Area (fraction of total pixels)',fontsize=40)
plt.plot([280,280],[0,1],color='k',linestyle='--',linewidth=10)
ax=plt.gca()
#ax.set_yscale('log')
#ax.set_xlim(0,1)
ax.set_ylim(0,0.8)
ax.legend(fontsize=40,loc='upper left')
plt.tight_layout()
plt.savefig(FiguresPath+'Figure11b.pdf')
plt.show()

In [None]:
ML_Tiles_df = pd.read_csv(ResultsPath+'Stats_by_tiles.csv')
ML_Tiles_df.shape
ML_Tiles_df['Dice']=(2*ML_Tiles_df['TP']+1e-10)/(1e-10+2*ML_Tiles_df['TP']+ML_Tiles_df['FP']+ML_Tiles_df['FN'])
ML_Tiles_df['area ratio']=(1e-10+(1e-10+ML_Tiles_df['TP']+ML_Tiles_df['FN'])/(1e-10++ML_Tiles_df['TP']+ML_Tiles_df['FP']))

Clustering_Tiles_df = pd.read_csv(ResultsPath+'Clustering_Stats_by_tiles.csv')
Clustering_Tiles_df.shape
Clustering_Tiles_df['Dice']=(2*Clustering_Tiles_df['TP']+1e-10)/(1e-10+2*Clustering_Tiles_df['TP']+Clustering_Tiles_df['FP']+Clustering_Tiles_df['FN'])
Clustering_Tiles_df['area ratio']=(1e-10+(1e-10+Clustering_Tiles_df['TP']+Clustering_Tiles_df['FN'])/(1e-10+Clustering_Tiles_df['TP']+Clustering_Tiles_df['FP']))


assert ML_Tiles_df.isnull().values.any()==False

In [None]:
#Per tile histograms
fig, [ax1,ax2] = plt.subplots(nrows=2, ncols=1,figsize=(20,25))

#plot Dice coefficient
aa=Clustering_Tiles_df.dropna()
ax1.hist(ML_Tiles_df['Dice']-0.05/3,bins=np.arange(0,1.1,0.1)-0.05/3,alpha=1,label='CNN',color='b',rwidth=0.333)
ax1.hist(aa['Dice']+0.05/3,bins=np.arange(0,1.1,0.1)+0.05/3,alpha=1,label='Clustering',color='g',rwidth=0.333)
ax1.set_xlim(0,1)
ax1.set_xticks(np.arange(0.05,1,0.1))
ax1.set_xticks(np.arange(0.05,1.05,0.1))
ax1.set_xlabel('Dice Coefficient',fontsize=30)
ax1.set_ylabel('Tile Count',fontsize=30)
ax1.legend(fontsize=40,loc='upper right')
ax1.set_ylim(0,17000)

ax2.hist(np.log(ML_Tiles_df['area ratio'])+8/30,bins=np.arange(-4,4,8/10)+8/30,alpha=1,label='CNN',color='b',rwidth=0.333)
ax2.hist(np.log(aa['area ratio'])-8/30,bins=np.arange(-4.8,4.8,8/10)-8/30,alpha=1,label='Clustering',color='g',rwidth=0.333)
ax2.set_xlim(-5,4)
ax2.set_xticks(np.arange(-4.8,4.8,8/10))
#ax3.set_xticks(np.arange(0.05,0.95,0.1))
ax2.set_xlabel('Log Area Ratio',fontsize=30)
ax2.set_ylabel('Tile Count',fontsize=30)


matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20)

fig.tight_layout()
plt.savefig(FiguresPath+'Figure12.pdf')
plt.show()