In [4]:
import pandas as pd
from os import getcwd, listdir
from scipy.spatial.distance import euclidean

data_path = getcwd() + "/data/sas_to_pandas/joins"

occ_list = pd.read_csv(f"{data_path}/OCC_List.csv")

reordered_columns = ['SOC_Code'] + occ_list.columns.tolist()[2:] + ['Grow']

occ_list = occ_list[reordered_columns]

grow_titles = occ_list[occ_list['Grow'] == 'Y']['Title']

decline_titles = occ_list[occ_list['Grow'] == 'N']['Title']


def make_dict(titles, df):
    # Get Specific Columns from the DataFrame
    cols = df.columns.tolist()
    sub_df = df[cols[2:-1]]
    sub_df = sub_df.drop('_NAME_', axis=1)

    # Titles is a pandas Series
    row_ind = titles.index.values
    title_values = titles.values

    return {title: sub_df.iloc[i].values for i, title in zip(*[row_ind, title_values])}


grow_dict = make_dict(grow_titles, occ_list)
decline_dict = make_dict(decline_titles, occ_list)

# Euclidean Distance

# Keep track of each growing job as a key in the dictionary below
distance_dict = {}

for g_key in grow_dict.keys():
    # Retrieve numpy array for growing job
    u = grow_dict[g_key]
    # Store calculated distances in this list
    distances = []
    for d_key in decline_dict.keys():
        # Retrieve numpy array for declining job
        v = decline_dict[d_key]
        # Calculate the Euclidean Distance
        distance = euclidean(u, v)
        # Add the distance to the distances list
        distances.append(distance)
    distance_dict[g_key] = distances

new_df = pd.DataFrame(distance_dict).T

new_df.columns = list(decline_dict.keys())

# Sort the Indexes and the Columns

## Sort Columns
new_cols = sorted(new_df.columns.tolist())
new_df = new_df[new_cols]

## Sort the Indexes
new_inds = sorted(new_df.index.tolist())
new_df = new_df.loc[new_inds]

# Get the Title Column
new_df.reset_index(inplace=True)
new_df.columns = ['Title'] + new_df.columns.tolist()[1:]

# Save Final DataFrame
new_df.to_csv(f"{data_path}/Final_Result.csv")

In [2]:
new_df

Unnamed: 0,Title,Bill and Account Collectors,"Bookkeeping, Accounting, and Auditing Clerks",Data Entry Keyers,"Dispatchers, Except Police, Fire, and Ambulance",Executive Secretaries and Executive Administrative Assistants,File Clerks,"Human Resources Assistants, Except Payroll and Timekeeping",Merchandise Displayers and Window Trimmers,"Office Clerks, General",Order Clerks,Payroll and Timekeeping Clerks,Procurement Clerks,"Secretaries and Administrative Assistants, Except Legal, Medical, and Execut","Title Examiners, Abstractors, and Searchers"
0,Dental Assistants,12.847474,12.589055,12.072199,10.987629,11.891758,11.388288,11.540715,10.711765,11.316855,10.409366,13.747603,11.780845,11.671547,13.244678
1,Nursing Assistants,12.05441,12.210278,11.711396,10.282208,11.986805,10.18999,11.410622,9.614567,10.61612,10.823036,13.180357,11.292613,11.047869,12.677693
2,Pharmacy Technicians,9.735379,9.466208,9.076574,8.764942,9.656806,8.660953,9.018198,9.848508,8.581212,8.317025,10.989886,9.102544,8.797386,10.001495
3,Physical Therapist Assistants,12.32566,12.268211,11.668423,11.365918,11.312957,11.280151,11.347079,10.75207,10.969289,11.17098,13.269827,11.668925,11.021583,12.391594
4,"Preschool Teachers, Except Special Education",9.958097,10.440948,10.817833,10.320073,10.117875,9.342398,10.532787,8.510523,9.139032,10.615197,11.521836,10.345762,9.469905,11.331275
5,Security Guards,10.041857,10.530043,10.297742,8.172802,10.13341,9.950216,9.731742,9.883532,9.521455,9.063333,12.007868,9.465728,9.913874,10.202392
