In [1]:
import pandas as pd
import math as mt
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#copy dispersion function:
def dispersion(ClusterData, x = 'x_coord', y = 'y_coord'):
    '''
    This function calculates the dispersion of a cluster from a pandas dataframe giving the x and y coordinates of .
    The dispersion is defined as the standard deviation of the distance between all possible crater pairs.
    This method gives meaningul results for clusters with more than 3 craters in a cluster.
    It is using the radius of Mars to convert from lat/lon data to metres.

    :param ClusterData: Dataframe containing all craters in clusters
    :type ClusterData: pandas dataframe
    :param x: column name giving the longitude, defaults to 'x_coord'
    :type x: str
    :param y: column name giving the latitude, defaults to 'y_coord'
    :type y: str
    '''
    Rmars = 3390000 #radius of Mars in metres
    #x and y are the names of the column in ClusterData denoting the x and y coordinates respectively
    #Assumes that x and y are in degrees still!
    df = ClusterData.copy() #create work copy of database
    coord_array = np.array(df[[x, y]]) #create array of xy coordinates for craters in cluster
    sep_list = []
    for n in range(0, len(coord_array)): #iterating over all craters for separation calculation
        for m in range(n+1, len(coord_array)): #calculating seperation ((x2-x1)**2 + (y2 - y1)**2)**0.5 for all combinations
            dx= (coord_array[m,0] - coord_array[n,0]) *Rmars*(np.pi/180)*mt.sin(mt.radians(90 - ((coord_array[m,0]+ coord_array[n,0])/2))) #converting to metres based xy coordinates
            dy = (coord_array[m,1] - coord_array[n,1]) *Rmars*(np.pi/180)
            sep = (dx**2+ dy**2)**0.5
            sep_list.append(sep) #adding all separations to list
    dispersion = np.std(sep_list) #calculating dispersion as standard deviation
    sep_array = np.array(sep_list)
    return dispersion, sep_array

In [52]:
#import crater data:
df = pd.read_excel('DataTables/AllCraters.xlsx', index_col = [0,1])
df_parameters = pd.read_excel('DataTables/ClusterParametersAll.xlsx', index_col = 0)

In [56]:
#run the dispersion on all clusters and create a Multiindex of all separations for each cluster
df_separations = pd.DataFrame()
for ID, df_new in df.groupby(level = 0):
    if len(df_new.index)>3:
        disp, sep_array = dispersion(df_new)
        ID_list = len(sep_array)*[ID]
        index = [i for i in range(len(sep_array))]
        df_sep = pd.DataFrame(data = {'HiRise_ID':ID_list})
        df_sep['pair'] = index
        df_sep['Separation(m)'] = sep_array
        df_sep.set_index(['HiRise_ID', 'pair'], inplace = True)
        df_separations = pd.concat([df_separations, df_sep])

In [60]:
#adding the mean, median, mode, lowest quartile and lowest decentile to the parameters list
mean_dict = {}
median_dict = {}
mode_dict = {}
quart_dict = {}
cent_dict = {}
for ID, df_new in df_separations.groupby(level = 0):
    mean_dict[ID] = df_new['Separation(m)'].mean()
    median_dict[ID] = df_new['Separation(m)'].median()
    mode_dict[ID] = df_new['Separation(m)'].mode()
    cent_dict[ID], quart_dict[ID] = df_new['Separation(m)'].quantile([0.1, 0.25])
df_parameters['Separation mean (m)'] = df_parameters['HiRise_ID'].map(mean_dict)
df_parameters['Separation median (m)'] = df_parameters['HiRise_ID'].map(median_dict)
df_parameters['Separation mode (m)'] = df_parameters['HiRise_ID'].map(mode_dict)
df_parameters['Separation lowest decentile (m)'] = df_parameters['HiRise_ID'].map(cent_dict)
df_parameters['Separation lowest quartile (m)'] = df_parameters['HiRise_ID'].map(quart_dict)

In [63]:
df_parameters.to_excel('ClusterParametersAll.xlsx') #save to the repository