Importing the necessary libraries

In [1]:
import pandas as pd
import numpy as np

Reading the csv-file and removing the empty row

In [2]:
df = pd.read_csv("Data/motivation_results_sum.csv")

In [3]:
rows_remove = [177, 325, 374, 499, 665, 1434, 1768, 2058, 2087]
for k in rows_remove:
    df = df.drop(k)
df = df.reset_index(drop=True)

Function for merging multiple subgroups into one:

In [4]:
def adding_df_vals(column_names):
    ''' 
    This function checks for a "positive" identification of categorization in different subcategories, to merge
    subcategories into one category.
    The function adds together the values of a set number of columns and creates a new array.
    If the sum of the column values is >= 1 this is a positive categorization, so the value of the array is set to 1, else
    it is set to 0.
    `column_names`: a list with the columns that is to be added together
    '''
    new_column = np.zeros(len(df[column_names[0]]))
    for i in range(len(new_column)):
        if sum(df.loc[i,column_names[j]] for j in range(len(column_names))) >= 1:
            new_column[i] = 1
        else:
            new_column[i] = 0
            
    return new_column

Combining Intrinsic Value-related categories and Cost-related categories into one each and inserting them into the dataframe:

In [5]:
intr_val_gen = adding_df_vals(['Intrinsic Value', 'Intrinsic Value (Astronomy)'])
cost_gen = adding_df_vals(['Cost (Emotional)', 'Cost (Outside Effort)', 'Cost (Loss)', 'Cost (Task Effort)'])

In [6]:
df.insert(3, "Cost", cost_gen, True)
df.insert(4, "Intrinsic Value (General)", intr_val_gen, True)

Function for removing the now obsolete rows:

In [7]:
def removing_columns(df, column_names):
    for i in range(len(column_names)):
        df = df.drop(column_names[i], axis=1)
    df = df.reset_index(drop=True)

    return df

In [8]:
col_remove = ['Intrinsic Value (Astronomy)', 'Intrinsic Value', 'Cost (Emotional)', 'Cost (Outside Effort)', 
              'Cost (Loss)', 'Cost (Task Effort)']

df = removing_columns(df, col_remove)

The order of categories in the csv-file is different from the order given to Llama3 is different, and thus have to be changed so that they are comparable.

In [9]:
def df_column_switch(df, column1, column2):
    i = list(df.columns)
    a, b = i.index(column1), i.index(column2)
    i[b], i[a] = i[a], i[b]
    df = df[i]
    return df

In [10]:
path_columns = ['Attainment Value', 'physiological/emotional', 'Event Triggered', 'Mastery', 'Media Triggered', 'Vicarious',
                'Intrinsic Social', 'Social Persuasion']

for i in range(len(path_columns)-1):
    df = df_column_switch(df, path_columns[i], path_columns[i+1])

Code used for centroids

In [11]:
'''
df = df.drop('Unnamed: 17', axis=1)
df = df.rename(columns={"Unnamed: 18": "Num Categories"})
df = df.reset_index(drop=True)
'''

In [12]:
df.head()

Unnamed: 0,Response,Codes,Attainment Value,Cost,Intrinsic Value (General),Media Triggered,Event Triggered,Intrinsic Social,Utility Value,Mastery,physiological/emotional,Social Persuasion,Vicarious,Num Categories
0,From the very beginning I had a love for math ...,"Intrinsic Value, Physiological/Emotional, Cost...",0,1.0,1.0,0,0,0,0,0,1,0,0,3
1,I first took a physics class in my junior year...,"Vicarious Experience, Intrinsic Value, Cost (L...",0,1.0,1.0,0,0,0,0,0,0,0,1,3
2,I first considered physics as a major when I t...,"Physiological/Emotional, Social Persuasion, Ma...",0,0.0,0.0,0,0,0,0,1,1,1,0,3
3,"Ever since I was a freshman in high school"","" ...","Vicarious Experience, Social Persuasion, Intri...",0,0.0,1.0,0,0,0,0,0,0,1,1,3
4,I had always wanted to become an astronaut as ...,"Utility Value, Intrinsic Value (Astronomy)",0,0.0,1.0,0,0,0,1,0,0,0,0,2


In [13]:
df.to_pickle('df.pkl')