In [1]:
import pandas as pd
import numpy as np
from nltk.corpus import wordnet

import warnings
warnings.filterwarnings("ignore")

In [2]:
# only once run for download WordNet or update
import nltk
# nltk.download('wordnet', download_dir='./')
nltk.data.path.append('./')

In [3]:
# inherit features from Gold Standard dataset
GS_all_agreed = pd.read_csv('./sampled_count/GS_All_Agreed.csv', index_col=0)
features_target = ['Synsets','domain_x',
                   'ngrams_last_mean',
                   'nrdirhypers_x',
                   'nrhypos_x',
                   'nrpartrels_normalised_x',
                   'depthfromtopsynset_normalised_x',
                   'glosslength_normalised_x',
                   'minwordlength_x',
                   'nroflemmas_x',
                   'polyscore_max_x',
                   'vote_x']
GS_adopt = GS_all_agreed[features_target]

GS_adopt

Unnamed: 0,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x
0,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,2,1,nb
1,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,1,1,nb
2,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,1,1,nb
3,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,1,1,b
4,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,2,1,nb
...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,2,2,nb
835,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,3,1,nb
836,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,1,1,nb
837,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,1,1,nb


In [4]:
# extract norms from synsets
GS_adopt['norm'] = GS_adopt['Synsets'].str.split("'").str[1].str.split('.').str[0]
GS_adopt = GS_adopt.set_index('norm').reset_index()

GS_adopt

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,2,1,nb
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,1,1,nb
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,1,1,nb
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,1,1,b
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,2,1,nb
...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,2,2,nb
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,3,1,nb
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,1,1,nb
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,1,1,nb


In [5]:
# matching norms with corpora
def sum_lemmas(norm, corpora):
    # search norm in WordNet
    synsets_list = wordnet.synsets(norm)
    lemmas = []
    feq_count = 0
    for synset in synsets_list:
        # extract lemmas from every synset
        lemmas += [str(lemma.name()) for lemma in synset.lemmas()]
    for lemma in lemmas:
        # check each lemma in corpora
        feq_count += corpora.loc[lemma == corpora.index].to_numpy().sum()
    
    return feq_count

In [6]:
# aggregate features the same as Niamh's
# kbnc_sum: The sum of all instances of each lemma per synset in the KBNC
kbnc_1m = pd.read_csv('./sampled_count/KBNC_1m_Count.csv', index_col='norm')
GS_adopt['kbnc_1m_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, kbnc_1m))

GS_adopt

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,kbnc_1m_sum
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,2,1,nb,0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,1,1,nb,0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,1,1,nb,0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,1,1,b,0
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,2,1,nb,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,2,2,nb,0
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,3,1,nb,0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,1,1,nb,0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,1,1,nb,0


In [7]:
# cabnc_per_100k: The frequency occurence of all lemmas per synset, per 100,000 words of the CABNC
cabnc_per_100k_2_4m = pd.read_csv('./sampled_count/CABNC_2_4m_Count.csv', index_col='index')
GS_adopt['cabnc_per_100k_2_4m'] = GS_adopt['norm'].apply(lambda norm: (sum_lemmas(norm, cabnc_per_100k_2_4m)/2400000)*100000)

cabnc_per_100k_1m = pd.read_csv('./sampled_count/CABNC_1m_Count.csv', index_col='index')
GS_adopt['cabnc_per_100k_1m'] = GS_adopt['norm'].apply(lambda norm: (sum_lemmas(norm, cabnc_per_100k_1m)/1000000)*100000)

GS_adopt

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,kbnc_1m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,2,1,nb,0,0.0,0.0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,1,1,nb,0,0.0,0.0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,1,1,nb,0,0.0,0.0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,1,1,b,0,0.0,0.0
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,2,1,nb,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,2,2,nb,0,0.0,0.0
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,3,1,nb,0,0.0,0.0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,1,1,nb,0,0.0,0.0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,1,1,nb,0,0.0,0.0


In [8]:
# childes_rel_sum: The sum of all instances of each lemma per synset in the CHILDES corpus, devided by the total number of words in the corpus
childes_1m = pd.read_csv('./sampled_count/CHILDES_1m_Count.csv', index_col='index')
total_count_1m = childes_1m['CHILDES_Count'].sum()
GS_adopt['childes_1m_rel_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, childes_1m)/total_count_1m)

childes_2_4m = pd.read_csv('./sampled_count/CHILDES_2_4m_Count.csv', index_col='index')
total_count_2_4m = childes_2_4m['CHILDES_Count'].sum()
GS_adopt['childes_2_4m_rel_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, childes_2_4m)/total_count_2_4m)

childes_5_7m = pd.read_csv('./sampled_count/CHILDES_5_7m_Count.csv', index_col='index')
total_count_5_7m = childes_5_7m['CHILDES_Count'].sum()
GS_adopt['childes_5_7m_rel_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, childes_5_7m)/total_count_5_7m)

GS_adopt

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,kbnc_1m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m,childes_1m_rel_sum,childes_2_4m_rel_sum,childes_5_7m_rel_sum
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,2,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,1,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,1,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,1,1,b,0,0.0,0.0,0.0,0.0,0.000000e+00
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,2,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,2,2,nb,0,0.0,0.0,0.0,0.0,1.754386e-07
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,3,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,1,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,1,1,nb,0,0.0,0.0,0.0,0.0,0.000000e+00


In [9]:
# bnc_sum: The sum of all instances of each lemma per synset in the BNC full text
bnc_100m = pd.read_csv('./sampled_count/BNC_100m_Count.csv', index_col='norm')
GS_adopt['bnc_100m_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, bnc_100m))

bnc_5_7m = pd.read_csv('./sampled_count/BNC_5_7m_Count.csv', index_col='norm')
GS_adopt['bnc_5_7m_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, bnc_5_7m))

bnc_2_4m = pd.read_csv('./sampled_count/BNC_2_4m_Count.csv', index_col='norm')
GS_adopt['bnc_2_4m_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, bnc_2_4m))

bnc_1m = pd.read_csv('./sampled_count/BNC_1m_Count.csv', index_col='norm')
GS_adopt['bnc_1m_sum'] = GS_adopt['norm'].apply(lambda norm: sum_lemmas(norm, bnc_1m))

GS_adopt

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,...,kbnc_1m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m,childes_1m_rel_sum,childes_2_4m_rel_sum,childes_5_7m_rel_sum,bnc_100m_sum,bnc_5_7m_sum,bnc_2_4m_sum,bnc_1m_sum
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,...,0,0.0,0.0,0.0,0.0,0.000000e+00,39,3,1,1
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,...,0,0.0,0.0,0.0,0.0,1.754386e-07,33,0,0,0
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,...,0,0.0,0.0,0.0,0.0,0.000000e+00,0,0,0,0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,...,0,0.0,0.0,0.0,0.0,0.000000e+00,33,1,1,1


In [10]:
GS_adopt.to_csv('./size_differential_features.csv', index=False)

## Extend features

In [29]:
origin_data = pd.read_csv('./size_differential_features.csv')
origin_data

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,...,kbnc_1m_sum,childes_1m_rel_sum,childes_2_4m_rel_sum,childes_5_7m_rel_sum,bnc_1m_sum,bnc_5_7m_sum,bnc_2_4m_sum,bnc_100m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,...,0,0.0,0.0,0.000000e+00,1,3,1,39,0,0
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,...,0,0.0,0.0,1.754386e-07,0,0,0,33,0,0
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,...,0,0.0,0.0,0.000000e+00,0,0,0,0,0,0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,...,0,0.0,0.0,0.000000e+00,1,1,1,33,0,0


In [32]:
# cabnc_sum: The frequency occurence of all lemmas per synset of the CABNC
cabnc_sum_2_4m = pd.read_csv('./sampled_count/CABNC_2_4m_Count.csv', index_col='index')
origin_data['cabnc_2_4m_sum'] = origin_data['norm'].apply(lambda norm: sum_lemmas(norm, cabnc_sum_2_4m))

cabnc_sum_1m = pd.read_csv('./sampled_count/CABNC_1m_Count.csv', index_col='index')
origin_data['cabnc_1m_sum'] = origin_data['norm'].apply(lambda norm: sum_lemmas(norm, cabnc_sum_1m))

origin_data

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,...,bnc_5_7m_sum,bnc_2_4m_sum,bnc_100m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m,childes_1m_sum,childes_2_4m_sum,childes_5_7m_sum,cabnc_2_4m_sum,cabnc_1m_sum
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,...,0,0,0,0,0,0,0,0,0,0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,...,0,0,0,0,0,0,0,0,0,0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,...,0,0,0,0,0,0,0,0,0,0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,...,3,1,39,0,0,0,0,0,0,0
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,...,0,0,33,0,0,0,0,1,0,0
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,...,0,0,0,0,0,0,0,0,0,0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,...,0,0,0,0,0,0,0,0,0,0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,...,1,1,33,0,0,0,0,0,0,0


In [30]:
# childes_rel_sum: The sum of all instances of each lemma per synset in the CHILDES corpus, devided by the total number of words in the corpus
childes_1m = pd.read_csv('./sampled_count/CHILDES_1m_Count.csv', index_col='index')
origin_data['childes_1m_sum'] = origin_data['norm'].apply(lambda norm: sum_lemmas(norm, childes_1m))

childes_2_4m = pd.read_csv('./sampled_count/CHILDES_2_4m_Count.csv', index_col='index')
origin_data['childes_2_4m_sum'] = origin_data['norm'].apply(lambda norm: sum_lemmas(norm, childes_2_4m))

childes_5_7m = pd.read_csv('./sampled_count/CHILDES_5_7m_Count.csv', index_col='index')
origin_data['childes_5_7m_sum'] = origin_data['norm'].apply(lambda norm: sum_lemmas(norm, childes_5_7m))

origin_data

Unnamed: 0,norm,Synsets,domain_x,ngrams_last_mean,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,...,childes_5_7m_rel_sum,bnc_1m_sum,bnc_5_7m_sum,bnc_2_4m_sum,bnc_100m_sum,cabnc_per_100k_2_4m,cabnc_per_100k_1m,childes_1m_sum,childes_2_4m_sum,childes_5_7m_sum
0,adjustable_wrench,Synset('adjustable_wrench.n.01'),tool,-18.854402,1,7,0.0,1.012903,0.563173,17,...,0.000000e+00,0,0,0,0,0,0,0,0,0
1,allen_wrench,Synset('allen_wrench.n.01'),tool,-18.389126,1,0,0.0,1.012903,0.391092,12,...,0.000000e+00,0,0,0,0,0,0,0,0,0
2,alligator_wrench,Synset('alligator_wrench.n.01'),tool,-22.354690,1,0,0.0,1.012903,1.517437,16,...,0.000000e+00,0,0,0,0,0,0,0,0,0
3,awl,Synset('awl.n.01'),tool,-14.809997,1,2,15.7,0.911613,0.985552,3,...,0.000000e+00,1,3,1,39,0,0,0,0,0
4,backsaw,Synset('backsaw.n.01'),tool,-18.327476,1,0,0.0,1.114194,1.110701,7,...,0.000000e+00,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,ballet_skirt,Synset('ballet_skirt.n.01'),garm,-16.872435,1,0,0.0,0.947552,0.578283,4,...,1.754386e-07,0,0,0,33,0,0,0,0,1
835,mess_jacket,Synset('mess_jacket.n.01'),garm,-18.743772,1,0,0.0,1.158120,1.652238,11,...,0.000000e+00,0,0,0,0,0,0,0,0,0
836,long_johns,Synset('long_johns.n.01'),garm,-16.882419,1,0,0.0,1.052836,0.479149,10,...,0.000000e+00,0,0,0,0,0,0,0,0,0
837,undies,Synset('undies.n.01'),garm,-16.330160,1,0,0.0,1.158120,0.280880,6,...,0.000000e+00,1,1,1,33,0,0,0,0,0


In [33]:
origin_data.to_csv('./all_occurrence_size_differential_features.csv', index=False)