In [1]:
## numpy is used for creating fake data
import numpy as np 
import matplotlib as mpl 
import pandas as pd
import os

## agg backend is used to create plot as a .png file
mpl.use('agg')

import matplotlib.pyplot as plt

In [2]:
output_folder='F:\example_output'

In [3]:
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print "Output folder created"

In [4]:
## Set path to the data
training_stats_csv='F:\data\classification\i.segment.stats\stats_training_set.csv'

In [5]:
## Import best segmentation parameters in a new dataframe
df=pd.read_csv(training_stats_csv, sep=',',header=0)

In [6]:
df.head(10)

Unnamed: 0,cat,Class,area,perimeter,compact_circle,compact_square,fd,opt_blue_min,opt_blue_max,opt_blue_range,...,Brightness_max,Brightness_range,Brightness_mean,Brightness_stddev,Brightness_sum,Brightness_coeff_var,Brightness_first_quart,Brightness_median,Brightness_third_quart,Brightness_perc_90
0,12572,1,76.0,58.0,1.876793,0.601227,1.87517,411,498,87,...,2098,331,1926.815789,72.304129,146438,3.752519,1869,1932.5,1990,2012
1,343693,6,340.0,164.0,2.508993,0.449734,1.749841,374,451,77,...,1630,122,1571.955882,24.242545,534465,1.54219,1555,1572.0,1590,1603
2,90580,1,41.0,36.0,1.586009,0.711458,1.929945,509,617,108,...,2534,346,2328.804878,89.970786,95481,3.863389,2249,2325.0,2375,2474
3,530237,1,49.0,38.0,1.531372,0.736842,1.86934,515,629,114,...,2772,525,2548.244898,128.092844,124864,5.026709,2443,2535.0,2672,2723
4,672972,6,83.0,60.0,1.857836,0.607362,1.853126,406,451,45,...,1875,111,1817.180723,23.22501,150826,1.278079,1801,1816.0,1832,1845
5,125928,6,43.0,40.0,1.720762,0.655744,1.961532,274,360,86,...,1528,325,1350.767442,72.167711,58083,5.342719,1301,1356.0,1385,1433
6,660935,5,86.0,76.0,2.311849,0.488085,1.944492,358,458,100,...,1634,312,1502.906977,46.159387,129250,3.07134,1478,1502.0,1531,1552
7,520150,3,47.0,46.0,1.892797,0.596144,1.988817,325,409,84,...,1562,247,1466.0,54.05041,68902,3.686931,1441,1470.0,1502,1527
8,282251,3,32.0,32.0,1.595769,0.707107,1.999982,466,560,94,...,2334,450,2126.25,111.878841,68040,5.261791,2033,2124.5,2201,2292
9,788451,2,32.0,38.0,1.894976,0.595458,2.099152,542,1488,946,...,4847,2503,3229.21875,572.839623,103335,17.739263,2836,3115.5,3633,4061


In [7]:
# Save the column containing the class of objects
class_column='Class'

# Get a list with values of 'Class'
class_label=sorted(list(df[class_column].unique()))
class_label

[1, 2, 3, 4, 5, 6]

In [12]:
# Select two classes to be compared
class_to_use=class_label
print class_to_use

[1, 2, 3, 4, 5, 6]


In [13]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

def get_cmap(n, name='brg'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

In [25]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Generate a list of colors for each class
cmap = get_cmap(len(class_to_use))

# Object feature to plot
feature_list=('NDWI_first_quart','opt_nir_first_quart','NDVI_first_quart')

for feature in feature_list:
    # Clear the figure, axes and close the figure
    plt.clf()
    plt.cla()
    plt.close()
    
    # Create a figure instance
    fig = plt.figure(1, figsize=(15, 15))

    for i, class_name in enumerate(class_to_use): 
        tempdf=df.loc[df[class_column] == class_name].copy()
        data=np.around(tempdf.loc[:,[feature]][feature].tolist(),decimals=3)

        # Plot A curve
        data.sort()
        hmean = np.mean(data)
        hstd = np.std(data)
        pdf = stats.norm.pdf(data, hmean, hstd)
        plt.plot(data, pdf, linestyle='solid', c=cmap(i), label="Class " + str(class_name)) # including h here is crucial
        plt.legend() 
            
    # Save the figure
    output=os.path.join(output_folder,"Example_B_feature_"+str(feature)+".svg")
    fig.savefig(output, format='svg', dpi=800)