## Libraries

In [1]:
# Libraries
import sys
import pandas as pd
import os
import numpy as np
import pickle
import networkx as nx

## Directory

In [2]:
code_dir = os.getcwd()

def parent_dir(dir, n: int):
    if n > 0: return parent_dir(os.path.abspath(os.path.join(dir, os.pardir)), n-1)
    else: return dir

main_dir = parent_dir(code_dir,1)
input_dir = main_dir + r'\Input'
interim_dir = main_dir + r'\Interim'
output_dir = main_dir + r'\Output'
util_dir = main_dir + r'\Utilities'
log_dir = main_dir + r'\Log'

print('Paths:')
print('Main: {}'.format(main_dir))
print('Input: {}'.format(input_dir))
print('Interim: {}'.format(interim_dir))
print('Output: {}'.format(output_dir))
print('Utilities: {}'.format(util_dir))
print('Log: {}'.format(log_dir))

Paths:
Main: D:\Projects\GRE_WORD_TOOL\Project
Input: D:\Projects\GRE_WORD_TOOL\Project\Input
Interim: D:\Projects\GRE_WORD_TOOL\Project\Interim
Output: D:\Projects\GRE_WORD_TOOL\Project\Output
Utilities: D:\Projects\GRE_WORD_TOOL\Project\Utilities
Log: D:\Projects\GRE_WORD_TOOL\Project\Log


## Modules LIB

In [3]:
# Modules
sys.path.insert(0,main_dir)
sys.path.insert(0,util_dir)

from Utilities import *
os.chdir(input_dir)

df_tools imported.


## IMPORT

In [4]:
# Importing Data
os.chdir(input_dir)

gre_words_fn = 'Gre_Words.xlsx'

df_gre_word_cat = pd.read_excel(gre_words_fn, sheet_name = 'word_seg', usecol='A:B')

df_tools.preview_df(df_gre_word_cat)

------------------
Top 5 rows:


Unnamed: 0,Category,Word
0,PREDICTION,AUGUR/AUGURY
1,PREDICTION,FORECAST
2,PREDICTION,FORESIGHT/ FORESEE
3,PREDICTION,FORETELL
4,PREDICTION,PRESAGE


Columns: 2
Category, Word

#Rows: 119
------------------ 



# PROCESS

### BASE MAPPING

In [5]:
base_map_df = df_gre_word_cat.drop_duplicates()

node_a_cat_map_df = base_map_df.rename(columns = {'Category':'link_category', 'Word': 'Node_A_word'})
node_b_cat_map_df = base_map_df.rename(columns = {'Category':'link_category', 'Word': 'Node_B_word'})

df_tools.preview_df(base_map_df)

------------------
Top 5 rows:


Unnamed: 0,Category,Word
0,PREDICTION,AUGUR/AUGURY
1,PREDICTION,FORECAST
2,PREDICTION,FORESIGHT/ FORESEE
3,PREDICTION,FORETELL
4,PREDICTION,PRESAGE


Columns: 2
Category, Word

#Rows: 119
------------------ 



#### BASE MAPPING

In [6]:
base_map_df = df_gre_word_cat.drop_duplicates()

node_a_cat_map_df = base_map_df.rename(columns = {'Category':'link_category', 'Word': 'Node_A_word'})
node_b_cat_map_df = base_map_df.rename(columns = {'Category':'link_category', 'Word': 'Node_B_word'})

df_tools.preview_df(base_map_df)

------------------
Top 5 rows:


Unnamed: 0,Category,Word
0,PREDICTION,AUGUR/AUGURY
1,PREDICTION,FORECAST
2,PREDICTION,FORESIGHT/ FORESEE
3,PREDICTION,FORETELL
4,PREDICTION,PRESAGE


Columns: 2
Category, Word

#Rows: 119
------------------ 



In [7]:
dummy_network_df = base_map_df.rename(columns = {'Category':'Primary_category', 'Word': 'Primary_word'})

dummy_network_df['Node_A_word'] = dummy_network_df['Primary_word']
dummy_network_df['Node_B_word'] = dummy_network_df['Primary_word']
dummy_network_df['link_category'] = dummy_network_df['Primary_category']
dummy_network_df['link_distance'] = 1
dummy_network_df['link_degree'] = 0

# Dropping dups
dummy_network_df = dummy_network_df.drop_duplicates()

df_tools.preview_df(dummy_network_df)

------------------
Top 5 rows:


Unnamed: 0,Primary_category,Primary_word,Node_A_word,Node_B_word,link_category,link_distance,link_degree
0,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,1,0
1,PREDICTION,FORECAST,FORECAST,FORECAST,PREDICTION,1,0
2,PREDICTION,FORESIGHT/ FORESEE,FORESIGHT/ FORESEE,FORESIGHT/ FORESEE,PREDICTION,1,0
3,PREDICTION,FORETELL,FORETELL,FORETELL,PREDICTION,1,0
4,PREDICTION,PRESAGE,PRESAGE,PRESAGE,PREDICTION,1,0


Columns: 7
Primary_category, Primary_word, Node_A_word, Node_B_word, link_category, link_distance, link_degree

#Rows: 119
------------------ 



In [8]:
# col list use in the loop
col_1_l = ['Primary_category', 'Primary_word', 'Node_A_word', 'link_category', 'Node_B_word']
col_l = ['Primary_category', 'Primary_word', 'Node_A_word', 'link_category', 'Node_B_word', 'link_distance', 'link_degree']

df_network = dummy_network_df[col_l]

# Numer of record
n = df_network.shape[0]

while n > 0:
    # existing network
    existing_network_1_df = df_network[col_1_l]
    existing_network_2_df = df_network.rename(columns = {'Node_A_word':'Node_B_word', 'Node_B_word': 'Node_A_word'})

    # Appending and asigning flag
    existing_network_df = existing_network_1_df[col_1_l].append(existing_network_2_df[col_1_l], ignore_index=True)
    existing_network_df = existing_network_df.drop_duplicates()
    existing_network_df['_existing_flag'] = 1

    # Removing where two nodes are the same
    existing_network_df = existing_network_df[~(existing_network_df['Node_A_word'] == existing_network_df['Node_B_word'])]
    existing_network_df = existing_network_df[~(existing_network_df['Primary_word'] == existing_network_df['Node_B_word'])]


    # second step
    working_df = df_network[col_l]
    working_df = working_df.reset_index()
    working_df = working_df.rename(columns = {'Node_A_word': '_prev_Word','Node_B_word':'Node_A_word'
                                                  , 'link_category': '_prev_link'
                                                  , 'link_degree': '_prev_link_degree'
                                                  , 'link_distance': '_prev_link_distance'})

    working_df = pd.merge(working_df, node_a_cat_map_df, how = 'inner', on = 'Node_A_word')
    working_df = pd.merge(working_df, node_b_cat_map_df, how = 'inner', on = 'link_category')

    # Removing where two nodes are the same
    working_df = working_df[~(working_df['Node_A_word'] == working_df['Node_B_word'])]
    working_df = working_df[~(working_df['Primary_word'] == working_df['Node_B_word'])]

    # Assigning NULl Columns
    working_df['link_distance'] = None
    working_df['link_degree'] = None

    # Removing duplicates
    working_df = working_df.drop_duplicates()


    # Removing existing network
    working_df = pd.merge(working_df, existing_network_df, how = 'left', on = col_1_l)
    working_df['_existing_flag'] = working_df['_existing_flag'].fillna(0)
    working_df = working_df[~(working_df['_existing_flag'] == 1)]


    # Numer of record
    n = working_df.shape[0]
    print('Number of new links: {}'.format(n))

    # Updating degree and distance
    if n>0:
        working_df['link_distance'] = working_df.apply(lambda x: x['_prev_link_distance'] + 1 if (x['_prev_Word'] != x['Node_A_word']) else x['_prev_link_distance'], axis = 1) 
        working_df['link_degree'] = working_df.apply(lambda x: x['_prev_link_degree'] + 1 if (x['_prev_link'] != x['link_category']) else x['_prev_link_degree'], axis = 1)


    # only keeping selected columns
    working_df = working_df[col_l]

    # updating overall network
    df_network = df_network[col_l].append(working_df[col_l], ignore_index=True).drop_duplicates()

    # Removing where two nodes are the same
    df_network = df_network[~(df_network['Node_A_word'] == df_network['Node_B_word'])]
    df_network = df_network[~(df_network['Primary_word'] == df_network['Node_B_word'])]

    # Reseting index
    df_network = df_network.reset_index(drop = True)

    # Deleting working tables
    del working_df
    del existing_network_1_df
    del existing_network_2_df
    del existing_network_df

df_tools.preview_df(df_network)

Number of new links: 866
Number of new links: 7200
Number of new links: 0
------------------
Top 5 rows:


Unnamed: 0,Primary_category,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree
0,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORECAST,1,0
1,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORESIGHT/ FORESEE,1,0
2,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORETELL,1,0
3,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESAGE,1,0
4,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESCIENCE,1,0


Columns: 7
Primary_category, Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree

#Rows: 8066
------------------ 



In [9]:
df_tools.preview_df(df_network[df_network['Primary_word'] =='AUGUR/AUGURY'])

------------------
Top 5 rows:


Unnamed: 0,Primary_category,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree
0,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORECAST,1,0
1,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORESIGHT/ FORESEE,1,0
2,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORETELL,1,0
3,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESAGE,1,0
4,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESCIENCE,1,0


Columns: 7
Primary_category, Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree

#Rows: 36
------------------ 



## ANALYSIS

In [10]:
Network_data_df = df_network.copy()

# Primary Connection
Primary_connection_df = Network_data_df.groupby(['Node_A_word', 'link_category', 'Node_B_word'])['link_distance', 'link_degree'].min()
Primary_connection_df = Primary_connection_df.reset_index()
Primary_connection_df = Primary_connection_df.rename(columns = {'Node_A_word': 'Primary_word'
                                                                , 'link_category': 'Primary_category'
                                                                , 'link_distance': 'Primary_distance'
                                                                , 'link_degree': 'Primary_degree'})

df_tools.preview_df(Primary_connection_df)

------------------
Top 5 rows:


Unnamed: 0,Primary_word,Primary_category,Node_B_word,Primary_distance,Primary_degree
0,ADEPT,SKILLFUL,ADROIT,1,0
1,ADEPT,SKILLFUL,CONSUMMATE,1,0
2,ADEPT,SKILLFUL,DEFT,1,0
3,ADEPT,SKILLFUL,DEXTEROUS,1,0
4,ADEPT,SKILLFUL,PROFICIENT,1,0


Columns: 5
Primary_word, Primary_category, Node_B_word, Primary_distance, Primary_degree

#Rows: 866
------------------ 



In [11]:
link_data_df = df_network.copy()

link_data_df['link_distance_resi'] = 1/link_data_df['link_distance']
link_data_df['link_degree_resi'] = 1/(link_data_df['link_degree']+1)

link_data_1_df = link_data_df[['Primary_word', 'Node_B_word', 'link_category', 'link_distance_resi', 'link_degree_resi']]
link_data_2_df = link_data_df.rename(columns = {'Primary_word':'Node_B_word', 'Node_B_word': 'Primary_word'})

# Appending
link_data_fin_df = link_data_1_df.append(link_data_2_df, ignore_index=True, sort=False)

# strength
link_data_fin_df['strength'] = link_data_fin_df['link_distance_resi']*link_data_df['link_degree_resi']

###########################################


# link strength
link_strength_df = link_data_fin_df.groupby(['Primary_word', 'link_category', 'Node_B_word'])['strength'].sum()
link_strength_df = link_strength_df.reset_index()
link_strength_df = link_strength_df.rename(columns = {'Primary_word': 'Node_A_word', 'strength': 'link_strength'})

df_tools.preview_df(link_strength_df)

############################################

# Node strength

# Node_link_count
link_count_df = link_data_df.groupby(['Primary_word', 'Node_B_word'])['link_category'].nunique()
link_count_df = link_count_df.reset_index()
link_count_df = link_count_df.rename(columns = {'Primary_word': 'Node_A_word', 'link_category': 'link_count'})

df_tools.preview_df(link_count_df)

------------------
Top 5 rows:


Unnamed: 0,Node_A_word,link_category,Node_B_word,link_strength
0,ADEPT,SKILLFUL,ADROIT,3.5
1,ADEPT,SKILLFUL,CONSUMMATE,3.5
2,ADEPT,SKILLFUL,DEFT,3.5
3,ADEPT,SKILLFUL,DEXTEROUS,3.5
4,ADEPT,SKILLFUL,PROFICIENT,3.5


Columns: 4
Node_A_word, link_category, Node_B_word, link_strength

#Rows: 866
------------------ 

------------------
Top 5 rows:


Unnamed: 0,Node_A_word,Node_B_word,link_count
0,ADEPT,ADROIT,1
1,ADEPT,CONSUMMATE,1
2,ADEPT,DEFT,1
3,ADEPT,DEXTEROUS,1
4,ADEPT,PROFICIENT,1


Columns: 3
Node_A_word, Node_B_word, link_count

#Rows: 866
------------------ 



In [12]:
# Merging
df_network_final = pd.merge(df_network, Primary_connection_df, how = 'left', on = ['Primary_word', 'Primary_category', 'Node_B_word'])
df_network_final = pd.merge(df_network_final, link_strength_df, how = 'left', on = ['Node_A_word', 'link_category', 'Node_B_word'])
df_network_final = pd.merge(df_network_final, link_count_df, how = 'left', on = ['Node_A_word', 'Node_B_word'])


df_tools.preview_df(df_network_final)

------------------
Top 5 rows:


Unnamed: 0,Primary_category,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree,Primary_distance,Primary_degree,link_strength,link_count
0,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORECAST,1,0,1,0,3.5,1
1,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORESIGHT/ FORESEE,1,0,1,0,3.5,1
2,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,FORETELL,1,0,1,0,3.5,1
3,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESAGE,1,0,1,0,3.5,1
4,PREDICTION,AUGUR/AUGURY,AUGUR/AUGURY,PREDICTION,PRESCIENCE,1,0,1,0,3.5,1


Columns: 11
Primary_category, Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree, Primary_distance, Primary_degree, link_strength, link_count

#Rows: 8066
------------------ 



### WORD NETWORK

In [13]:
Col_wn_l = ['Primary_word', 'Node_A_word', 'link_category', 'Node_B_word']
Val_wn_l = ['link_distance', 'link_degree', 'Primary_distance', 'Primary_degree', 'link_strength', 'link_count']

df_word_network = df_network_final.groupby(Col_wn_l)[Val_wn_l].min().reset_index()

df_tools.preview_df(df_word_network)

------------------
Top 5 rows:


Unnamed: 0,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree,Primary_distance,Primary_degree,link_strength,link_count
0,ADEPT,ADEPT,SKILLFUL,ADROIT,1,0,1,0,3.5,1
1,ADEPT,ADEPT,SKILLFUL,CONSUMMATE,1,0,1,0,3.5,1
2,ADEPT,ADEPT,SKILLFUL,DEFT,1,0,1,0,3.5,1
3,ADEPT,ADEPT,SKILLFUL,DEXTEROUS,1,0,1,0,3.5,1
4,ADEPT,ADEPT,SKILLFUL,PROFICIENT,1,0,1,0,3.5,1


Columns: 10
Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree, Primary_distance, Primary_degree, link_strength, link_count

#Rows: 8066
------------------ 



In [14]:
# Ranking
df_word_network_1 = df_word_network.copy()

df_word_network_1['_dist_rank'] = df_word_network_1.groupby(['Primary_word', 'Node_B_word'])['link_distance'].rank('dense', ascending=True)
df_word_network_1['_node_rank'] = df_word_network_1.groupby(['Primary_word', 'Node_B_word', '_dist_rank'])['link_count'].rank('dense', ascending=False)
df_word_network_1['_link_rank'] = df_word_network_1.groupby(['Primary_word', 'Node_B_word', '_dist_rank','_node_rank'])['link_strength'].rank('first', ascending=False)

def fun_primary_link(x):
    if x['_dist_rank'] == 1 and x['_node_rank'] == 1 and x['_link_rank'] == 1: return 'Y'
    else: return 'N'
    
df_word_network_1['primary_link'] = df_word_network_1.apply(lambda x: fun_primary_link(x), axis= 1)

col_l = ['Primary_word', 'Node_A_word', 'link_category', 'Node_B_word', 'link_distance', 'link_degree', 'Primary_distance', 'Primary_degree', 'link_strength', 'link_count', 'primary_link']

df_word_network_final = df_word_network_1[col_l]

df_tools.preview_df(df_word_network_final)

------------------
Top 5 rows:


Unnamed: 0,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree,Primary_distance,Primary_degree,link_strength,link_count,primary_link
0,ADEPT,ADEPT,SKILLFUL,ADROIT,1,0,1,0,3.5,1,Y
1,ADEPT,ADEPT,SKILLFUL,CONSUMMATE,1,0,1,0,3.5,1,Y
2,ADEPT,ADEPT,SKILLFUL,DEFT,1,0,1,0,3.5,1,Y
3,ADEPT,ADEPT,SKILLFUL,DEXTEROUS,1,0,1,0,3.5,1,Y
4,ADEPT,ADEPT,SKILLFUL,PROFICIENT,1,0,1,0,3.5,1,Y


Columns: 11
Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree, Primary_distance, Primary_degree, link_strength, link_count, primary_link

#Rows: 8066
------------------ 



In [41]:
df_tools.preview_df(df_word_network_final[df_word_network_final['Primary_word'] =='ADEPT'], 5)

------------------
Top 5 rows:


Unnamed: 0,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree,Primary_distance,Primary_degree,link_strength,link_count,primary_link
0,ADEPT,ADEPT,SKILLFUL,ADROIT,1,0,1,0,3.5,1,Y
1,ADEPT,ADEPT,SKILLFUL,CONSUMMATE,1,0,1,0,3.5,1,Y
2,ADEPT,ADEPT,SKILLFUL,DEFT,1,0,1,0,3.5,1,Y
3,ADEPT,ADEPT,SKILLFUL,DEXTEROUS,1,0,1,0,3.5,1,Y
4,ADEPT,ADEPT,SKILLFUL,PROFICIENT,1,0,1,0,3.5,1,Y


Columns: 11
Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree, Primary_distance, Primary_degree, link_strength, link_count, primary_link

#Rows: 36
------------------ 



## Create Coordinates for Nodes in Network

In [49]:
df_InputData = df_word_network_final.copy()

In [50]:
#Nodes are positioned using the Fruchterman-Reingold force-directed algorithm.

Q = nx.Graph()
arr_SrcTgt= np.array(df_InputData[['Node_A_word', 'Node_B_word']])
Q.add_edges_from(arr_SrcTgt)
dict_Coords = nx.spring_layout(Q)

df_Raw_Coords = pd.DataFrame(dict_Coords)
df_Raw_Coords = df_Raw_Coords.T
df_Raw_Coords.columns = ['X','Y']
df_Raw_Coords['NodeName'] = df_Raw_Coords.index

#Add in a "Node Name" for cases where nodes do not link with another named node

df_Raw_Coords.fillna("Not Specified", inplace = True)

df_tools.preview_df(df_Raw_Coords)

------------------
Top 5 rows:


Unnamed: 0,X,Y,NodeName
ADEPT,0.434503,0.989818,ADEPT
ADROIT,0.571354,0.98914,ADROIT
AESTHETIC,0.382561,0.970477,AESTHETIC
AFFINITY,0.478881,0.007236,AFFINITY
AGHAST,0.506328,0.567412,AGHAST


Columns: 3
X, Y, NodeName

#Rows: 119
------------------ 



#### Joining with the network

In [51]:
#Create bridge between main dataset and coordinates

arr_SrcTgt2 = arr_SrcTgt.reshape(1,(len(arr_SrcTgt)*2))
arr_SrcTgt2 = arr_SrcTgt2.reshape(-1)
df_SrcTgt = pd.DataFrame(arr_SrcTgt2,columns=['NodeName'])
arr_Index = []
for i in range(1,(len(arr_SrcTgt)+1)):
        arr_Index.append(i)
        arr_Index.append(i)
df_SrcTgt['c_Index'] = arr_Index

df_tools.preview_df(df_SrcTgt)

------------------
Top 5 rows:


Unnamed: 0,NodeName,c_Index
0,ADEPT,1
1,ADROIT,1
2,ADEPT,2
3,CONSUMMATE,2
4,ADEPT,3


Columns: 2
NodeName, c_Index

#Rows: 16132
------------------ 



In [52]:
#Join the datasets

df_InputData.index = df_InputData.index + 1

Merge_1 = pd.merge(
    left=df_SrcTgt,
    right=df_InputData,
    how="inner",
    left_on=df_SrcTgt['c_Index'],
    right_index=True,
)

df_MainDat = pd.DataFrame(Merge_1)
df_MainDat = df_MainDat.drop(columns=['key_0'])

Merge_2 = pd.merge(
    left=df_Raw_Coords,
    right=df_MainDat,
    how="left",
    left_on=df_Raw_Coords['NodeName'],
    right_on=df_MainDat['NodeName'],
    suffixes=("", "_y"),
)

df_finaldat = pd.DataFrame(Merge_2)
df_finaldat = df_finaldat.drop(columns=['key_0','NodeName_y'])

df_tools.preview_df(df_finaldat)

In [54]:
df_tools.preview_df(df_finaldat[df_finaldat['c_Index']==1])

------------------
Top 5 rows:


Unnamed: 0,X,Y,NodeName,c_Index,Primary_word,Node_A_word,link_category,Node_B_word,link_distance,link_degree,Primary_distance,Primary_degree,link_strength,link_count,primary_link
0,0.434503,0.989818,ADEPT,1,ADEPT,ADEPT,SKILLFUL,ADROIT,1,0,1,0,3.5,1,Y
72,0.571354,0.98914,ADROIT,1,ADEPT,ADEPT,SKILLFUL,ADROIT,1,0,1,0,3.5,1,Y


Columns: 15
X, Y, NodeName, c_Index, Primary_word, Node_A_word, link_category, Node_B_word, link_distance, link_degree, Primary_distance, Primary_degree, link_strength, link_count, primary_link

#Rows: 2
------------------ 



In [55]:
os.chdir(output_dir)

df_finaldat.to_excel('word_network_test.xlsx', index = False)

## QC

In [38]:
arr_SrcTgt2 = arr_SrcTgt.reshape(1,(len(arr_SrcTgt)*2))
arr_SrcTgt2 = arr_SrcTgt2.reshape(-1)
df_SrcTgt = pd.DataFrame(arr_SrcTgt2,columns=['NodeName'])


df_tools.preview_df(df_SrcTgt)

------------------
Top 5 rows:


Unnamed: 0,NodeName
0,ADEPT
1,ADROIT
2,ADEPT
3,CONSUMMATE
4,ADEPT


Columns: 1
NodeName

#Rows: 1732
------------------ 



In [36]:
arr_SrcTgt2 = arr_SrcTgt.reshape(1,(len(arr_SrcTgt)*2))
arr_SrcTgt3 = arr_SrcTgt2.reshape(-1)

print(arr_SrcTgt.shape)
print(arr_SrcTgt2.shape)
print(arr_SrcTgt3.shape)
arr_SrcTgt2.shape

(866, 2)
(1, 1732)
(1732,)


(1, 1732)

In [20]:
arr_SrcTgt2 = arr_SrcTgt.reshape(1,(len(arr_SrcTgt)*2))

arr_SrcTgt2

array([['ADEPT', 'ADROIT', 'ADEPT', ..., 'PRECOCIOUS', 'SANGUINE',
        'BUOYANT']], dtype=object)

In [22]:
len(arr_SrcTgt)

866

In [24]:
len(arr_SrcTgt2[0])

1732

In [28]:
arr_SrcTgt.size

1732

## EXPORT

In [147]:
os.chdir(output_dir)

df_word_network_final.to_excel('word_network_final.xlsx', index = False)
df_word_network_final.to_csv('word_network_final.csv')