Code by Dr. Martin Ross(1), modified from previous versions written with the assistance of Cindy Liu(2) <br>
ChapGPT (GPT-4) was also used to fix some issues and improve a few things <br>
(1) Associate Professor, Earth and Environmental Sciences <br>
(2) Coop student, Physics & Astronomy <br>
University of Waterloo, Canada

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd

In [None]:
#The default structure is samples (rows) and weight frequencies (cols) from coarse (left) to fine (right)
#The grain size headers are in millimetres
#Rename file and adjust the skiprows and column range as needed

#NOTE:
#In this example, the wt. Freq. of the pan fraction in the input file have been modified to be distributed across the full range of silt and clay in a decreasing fashion
#This can be improved by analyzing the fine fraction with a another method like laser diffractomery

#Load the wt. freq. data as a 2D numpy array
allFreq = np.loadtxt("modified_2021data.txt", skiprows=2, 
                     usecols=np.arange(1,18)).transpose()

#Load the grain size fractions as a 1D numpy array
grainSizesData = np.loadtxt('modified_2021data.txt', skiprows = 1,
                            usecols=np.arange(0, 17), dtype='str')[0]

# Desired number of decimal places for rounding
decimal_places = 4

# grain size labels in mm
grainSizes = []

for i in grainSizesData[:]:
    grainSizes.append(round(float(i), decimal_places))

# sample ID
allLabels = np.loadtxt("modified_2021data.txt", skiprows=2, usecols=0, dtype='str')

In [None]:
print(grainSizes)

In [None]:
# function for cumulative curve values
def cumulative(allSamples, start, end):
    
    # inputs: (list of lists, int, int)
    
    sampleCumulative = [[]] * (end-start)
    
    for i in range(start,end):
        sampleCumulative[i-start] = allSamples[:,i]
        sampleCumulative[i-start] = np.cumsum(sampleCumulative[i-start])
        
    return sampleCumulative

# function for cumulative curves
def cumulativeCurve(allSamples, start, end, xSize, ySize):
    
    # inputs: (list of lists, int, int, int, int)
    
    cumulativeVals = cumulative (allSamples, start, end)
    
    for i in range(len(cumulativeVals)):
        if (i % 2) == 0:
            colour = '#465775'
        else:
            colour = '#ef6f6c'
        fig, ax = plt.subplots(1,1, figsize=(xSize, ySize))
        plt.plot(range(len(grainSizes)), cumulativeVals[i], 
                 color=colour, linewidth=2.5, marker='o')
        plt.xticks(np.arange(0,(len(phiVals))), labels=phiVals, fontsize=14)
        plt.ylim((-0.5, 105))
        ax.set_xlabel('phi', fontsize=14)
        ax.set_ylabel('Cumulative wt. Frequency %', fontsize=14)
        ax.tick_params(axis='y', labelsize=14)
        ax.set_title('Cumulative wt. Frequency of Sample '+ str(allLabels[start+i]), 
                     pad=20, fontsize=18)
        plt.show()

In [None]:
def cumulativeCurve2(allSamples, start, end, xSize, ySize):
    
    cumulativeVals = cumulative(allSamples, start, end)
    
    # Define a list of colors and line styles
    colors = ['#A0522D', '#708090', '#2E8B57', '#000000', '#FFA500']
    line_styles = ['-', '--', '-.', ':', (0, (3, 5, 1, 5))]
    markers = ['.', 'o', 'x', '+', '^']
    
    # Create a single figure and axis
    fig, ax = plt.subplots(1, 1, figsize=(xSize, ySize))
    
    for i, (color, style, marker) in enumerate(zip(colors, line_styles, markers)):
        if i >= len(cumulativeVals):
            break  # Exit loop if there are more styles than curves
        
        # Plot the cumulative curve on the same axis with unique style and color
        ax.plot(range(len(grainSizes)), cumulativeVals[i], 
                color=color, linestyle=style, linewidth=2.5, marker=marker, 
                markersize=8,  # Adjust marker size here
                label=f'{allLabels[start+i]}')
    
    plt.xticks(np.arange(0, len(phiVals)), labels=phiVals, fontsize=14)
    plt.ylim((-0.5, 105))
    ax.set_xlabel('phi', fontsize=14)
    ax.set_ylabel('Cumulative wt. Frequency %', fontsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.set_title('Cumulative wt. Frequency of Selected Samples', 
                 pad=20, fontsize=18)
    ax.legend()  # Show legend with sample labels
    
    plt.show()

# Example usage:
# cumulativeCurve2(allSamples, start=0, end=5, xSize=10, ySize=6)




In [None]:
# calculating phi values
grainSizesFloat = []

for i in grainSizes[:]:
    grainSizesFloat.append(float(i))

phiVals = []

for i in grainSizesFloat:
    phi = -float(round(math.log(i,2)))
    if phi == -0.0:
        phi = 0.0
    phiVals.append(phi)

print(phiVals)

In [None]:
# functions for verbal representations of skew and standard deviation
def verbalSkew(skewVal):
    if skewVal > 0.3:
        skewText = "Strongly fine-skewed"
    elif skewVal >= 0.1:
        skewText = "Fine skewed" 
    elif skewVal >= -0.1:
        skewText = "Near symmetrical"
    elif skewVal >=-0.3:
        skewText = "Coarse skewed"
    else:
        skewText = "Strongly coarse skewed"
    return skewText

def verbalStdev(stdev):
    if stdev >4.00:
        stdevText = "Extremely poorly sorted"
    elif stdev >=2.0:
        stdevText = "Very poorly sorted"
    elif stdev >=1.00:
        stdevText = "Poorly sorted"
    elif stdev >=0.71:
        stdevText = "Moderately sorted"
    elif stdev >=0.50:
        stdevText = "Moderately well sorted"
    elif stdev >=0.35:
        stdevText = "Well sorted"
    else:
        stdevText = "Very well sorted"  
    return stdevText

In [None]:
# function for skewness, mean, and stdev
def skewMeanStdev(cumulativeVals):
    
    # inputs: (list)
    
    skews = []
    means = []
    stdevs = []
    
    for i in range(len(cumulativeVals)):
        phi5p = np.interp(5, cumulativeVals[i], phiVals)
        phi16p = np.interp(16, cumulativeVals[i], phiVals)
        phi50p = np.interp(50, cumulativeVals[i], phiVals)
        phi84p = np.interp(84, cumulativeVals[i], phiVals)
        phi95p = np.interp(95, cumulativeVals[i], phiVals)
        
        skew = ((phi84p+phi16p-(2*phi50p))/(2*(phi84p-phi16p))) + ((phi95p+phi5p-(2*phi50p))/(2*(phi95p-phi5p)))
        mean = (phi16p+phi50p+phi84p)/3
        stdev = ((phi84p-phi16p)/4) + ((phi95p-phi5p)/6.6)
        
        skews.append(skew)
        means.append(mean)
        stdevs.append(stdev)
        
    return [skews, means, stdevs]  

In [None]:
# call the cumulative function to get cumulative values
my_cumulative_vals = cumulative(allFreq, 0, 41)

# call the skewMeanStdev function to obtain skewness, mean, and stdev
skewness, mean, stdev = skewMeanStdev(my_cumulative_vals)

# print the outputs
print("Skewness:", skewness)
print("Mean:", mean)
print("Standard Deviation:", stdev)

In [None]:
# define the input variables
allSamples = allFreq
start = 0
end = 41

# call the cumulative and skewMeanStdev functions to obtain the outputs
my_cumulative_vals = cumulative(allSamples, start, end)
skewness, mean, stdevs = skewMeanStdev(my_cumulative_vals)


# create a dataframe from the output
df = pd.DataFrame({'Sample ID': allLabels, 'Mean': mean, 'Skewness': skewness, 'Stdev': stdev})

# print the dataframe
print(df)


In [None]:
def createDataFrame(allSamples, allLabels, start, end):
    cumulativeVals = cumulative(allSamples, start, end)
    statsVals = skewMeanStdev(cumulativeVals)
    skews = statsVals[0]
    means = statsVals[1]
    stdevs = statsVals[2]

    df = pd.DataFrame({'Sample ID': allLabels[start:end], 
                       'Mean': means, 
                       'Skewness': skews, 
                       'Standard Deviation': stdevs})
    
    # add verbal skewness and standard deviation columns
    df['Verbal Skewness'] = df['Skewness'].apply(verbalSkew)
    df['Verbal Stdev'] = df['Standard Deviation'].apply(verbalStdev)
    
    return df

In [None]:
createDataFrame(allFreq,allLabels, 0, 41)

In [None]:
# function to generate weight frequency distribution graphs 
def wtHist(allSamples, start, end, xSize, ySize):
    
    '''
    inputs: (list of lists, int, int, list of 
            lists, int, int)
            
    start>end
    '''
    adjust = start
    if start > 5:
        adjust -= 1
    
    specifiedSamples = [[]] * (end-start)
    for i in range(start,end):
        specifiedSamples[i-start] = allSamples[:,i]
    
    sampleCount = start
    
    for i in specifiedSamples:
        if (sampleCount % 2) == 0:
            colour = '#4464ad'
        else:
            colour = '#a4b0f5'
        fig, ax = plt.subplots(1,1,figsize=(xSize,ySize))
        ax.bar(np.arange(len(i)), i, color=colour, alpha=0.8)
        ax.plot(np.arange(len(i)), i, color='#f58f29', 
                marker='o', linewidth=0)
        plt.xticks(np.arange(0,len(grainSizes))+0.5, labels=grainSizes)
        ax.set_xlabel('Grain Size (mm)', fontsize=14)
        ax.set_ylabel('Frequency (wt. %)', fontsize=14)
        ax.tick_params(axis='y', labelsize=14)
        ax.set_title('Weight Frequency Distribution of Sample '
                     + str(allLabels[sampleCount]), pad=20, fontsize=18)
        plt.show()
        sampleCount+=1

In [None]:
# weight frequency distribution of samples 1-5
wtHist(allFreq,0,5, 14,8)


In [None]:
# Call the function to create the figures and save each figure to the PDF file
cumulativeCurve(allFreq, 0, 5, 10, 7) #cumulative curves of grain sizes in samples 1-5

In [None]:
# Call the function to create the figures and save each figure to the PDF file
cumulativeCurve2(allFreq, 0, 5, 10, 7) #cumulative curves of grain sizes in samples 1-5