#### Importing Libs

In [1]:
import pandas as pd
from decouple import config


#### Load Enviroment Vars

In [2]:
RAW_DATA = config("RAW_DATA")
PROCESSED_DATA = config("PROCESSED_DATA")
# Vars neo4j
URI = config("URI")
USER = config("USER")
PASSWORD = config("PASSWORD")

#### Load Data

In [3]:
df_tmp_ocupacao = pd.read_excel(RAW_DATA+"QBQ\\QBQ.xlsx", sheet_name="Ocupação")
df_tmp_knowledge = pd.read_excel(RAW_DATA+"QBQ\\QBQ.xlsx", sheet_name="Conhecimento I")
df_tmp_knowledge_ = pd.read_excel(RAW_DATA+"QBQ\\QBQ.xlsx", sheet_name="Conhecimento II")

#### Processing Data

In [4]:
# Rename columns dataframe 'occupation'
df_tmp_ocupacao.columns = [
    "codCBO", "occupation", 
    "jobDescription", "occupationProfile", 
    "knowledgeLevel", "habilityLevel", 
    "attitudeLevel", "occupationLevel"
    ]

In [5]:
# Rename columns dataframe 'knowledge'
df_tmp_knowledge.columns = [
    "codCBO",
    "codDomain",
    "descriptionArea",
    "codArea",
    "descriptionArea",
    "codField",
    "descriptionField",
    "codKnowledge",
    "descriptionKnowledge",
    "proof",
    "frequency",
    "importance"
]
# Create dataframe to nodes
df_tmp_knowledge = df_tmp_knowledge[["codCBO", "descriptionKnowledge", "proof", "frequency", "importance"]]
# processing text of column 'descriptionKnowledge'
df_tmp_knowledge["descriptionKnowledge"] = df_tmp_knowledge["descriptionKnowledge"].apply(lambda x: x.capitalize())

In [6]:
# Drop column we dont use
df_tmp_knowledge_.drop(columns=["codConhecimento", "desConhecimento"], inplace=True)
# Rename columns dataframe 'knowledge_"
df_tmp_knowledge_.columns = [
    "codCBO",
    "codArea",
    "descriptionArea",
    "codField",
    "descriptionField",
    "codKnowledge",
    "descriptionKnowledge",
    "proof",
    "frequency",
    "importance"
]
# Create dataframe to nodes
df_tmp_knowledge_ = df_tmp_knowledge_[["codCBO", "descriptionKnowledge", "proof", "frequency", "importance"]]
# Processing text of column 'descriptionKnowledge'
df_tmp_knowledge_["descriptionKnowledge"] = df_tmp_knowledge_["descriptionKnowledge"].apply(lambda x: x.capitalize())

In [7]:
# Merge Data Knowledge
df_knowledge = pd.concat([df_tmp_knowledge, df_tmp_knowledge_], axis=0)

In [8]:
# Cleaning data
df_knowledge["descriptionKnowledge"] = df_knowledge["descriptionKnowledge"].apply(lambda x: " ".join(x.replace("(especificar:...)", "").split()))

In [10]:
# Create Knowledge nodes
nodes_knowledge = pd.DataFrame(df_knowledge["descriptionKnowledge"].unique(), columns=["name"])
nodes_knowledge["name"] = nodes_knowledge["name"].apply(lambda x: x.replace('"', ""))

#### Saving Data in Neo4j

In [9]:
df_knowledge[df_knowledge.codCBO == 212405][:15]

Unnamed: 0,codCBO,descriptionKnowledge,proof,frequency,importance
482,212405,Arquitetura de computação,4,4,4
483,212405,Banco de dados,4,4,4
484,212405,Hardware,3,3,3
485,212405,Linguagem de programação,5,5,5
486,212405,Sistema de informação distribuído,4,4,4
487,212405,Sistemas operacionais,4,4,4
488,212405,Teoria da computação e matemática,4,4,4
489,212405,Outros conhecimentos de ciência da computação ...,4,4,5
490,212405,Outros conhecimentos de ciência da computação ...,4,4,4
491,212405,Outros conhecimentos de ciência da computação ...,5,4,5


In [11]:
from core.neo4jHandler import SendDataNeo4j

In [12]:
driver_neo4j = SendDataNeo4j(uri=URI, user=USER, password=PASSWORD)

#### Create Nodes of Knowledge

In [15]:
driver_neo4j.create_nodes(dataframe=nodes_knowledge, label="Knowledge")

>>> Save Line: 0, Label: Knowledge
>>> Save Line: 1, Label: Knowledge
>>> Save Line: 2, Label: Knowledge
>>> Save Line: 3, Label: Knowledge
>>> Save Line: 4, Label: Knowledge
>>> Save Line: 5, Label: Knowledge
>>> Save Line: 6, Label: Knowledge
>>> Save Line: 7, Label: Knowledge
>>> Save Line: 8, Label: Knowledge
>>> Save Line: 9, Label: Knowledge
>>> Save Line: 10, Label: Knowledge
>>> Save Line: 11, Label: Knowledge
>>> Save Line: 12, Label: Knowledge
>>> Save Line: 13, Label: Knowledge
>>> Save Line: 14, Label: Knowledge
>>> Save Line: 15, Label: Knowledge
>>> Save Line: 16, Label: Knowledge
>>> Save Line: 17, Label: Knowledge
>>> Save Line: 18, Label: Knowledge
>>> Save Line: 19, Label: Knowledge
>>> Save Line: 20, Label: Knowledge
>>> Save Line: 21, Label: Knowledge
>>> Save Line: 22, Label: Knowledge
>>> Save Line: 23, Label: Knowledge
>>> Save Line: 24, Label: Knowledge
>>> Save Line: 25, Label: Knowledge
>>> Save Line: 26, Label: Knowledge
>>> Save Line: 27, Label: Knowledge
>>

#### Create edges called DEPTH of Occupation nodes to Knowledge nodes

In [16]:
df_knowledge

Unnamed: 0,codCBO,descriptionKnowledge,proof,frequency,importance
0,202115,Linguagem de programação,4,4,4
1,202115,Outros conhecimentos de ciência da computação ...,4,4,4
2,202115,Outros conhecimentos de ciência da computação ...,4,4,4
3,202115,Informática,4,4,4
4,202115,Internet,4,4,4
...,...,...,...,...,...
31645,262820,Outros elementos (tecnologias estratégicas) ro...,3,3,4
31646,262820,Metodologia científica,4,5,5
31647,262820,Metodologia de pesquisa,4,4,4
31648,262820,Técnicas de pesquisa,5,5,5


In [17]:
driver_neo4j.send_edges_neo4j(
    dataframe=df_knowledge, 
    label_node="Occupation", 
    label_node2="Knowledge",
    search_property_node="codCBO", # Property in node
    search_property_node2="name", # Property in node
    property_node="codCBO", # Column in dataframe
    property_node2="descriptionKnowledge", # Column in dataframe
    relationship_name="DEPTH", # Label of relationship
    property_in_edge=True, # Verification if exists property in edges
    property_edge="level", # Name to property in edges
    weigth_edge="proof" # Column with data to put in weight of edges
    )

>> Save edge 0
>> Save edge 1
>> Save edge 2
>> Save edge 3
>> Save edge 4
>> Save edge 5
>> Save edge 6
>> Save edge 7
>> Save edge 8
>> Save edge 9
>> Save edge 10
>> Save edge 11
>> Save edge 12
>> Save edge 13
>> Save edge 14
>> Save edge 15
>> Save edge 16
>> Save edge 17
>> Save edge 18
>> Save edge 19
>> Save edge 20
>> Save edge 21
>> Save edge 22
>> Save edge 23
>> Save edge 24
>> Save edge 25
>> Save edge 26
>> Save edge 27
>> Save edge 28
>> Save edge 29
>> Save edge 30
>> Save edge 31
>> Save edge 32
>> Save edge 33
>> Save edge 34
>> Save edge 35
>> Save edge 36
>> Save edge 37
>> Save edge 38
>> Save edge 39
>> Save edge 40
>> Save edge 41
>> Save edge 42
>> Save edge 43
>> Save edge 44
>> Save edge 45
>> Save edge 46
>> Save edge 47
>> Save edge 48
>> Save edge 49
>> Save edge 50
>> Save edge 51
>> Save edge 52
>> Save edge 53
>> Save edge 54
>> Save edge 55
>> Save edge 56
>> Save edge 57
>> Save edge 58
>> Save edge 59
>> Save edge 60
>> Save edge 61
>> Save edge 62
>>

#### Create edges called FREQUENCY of Occupation nodes to Knowledge nodes

In [None]:
driver_neo4j.send_edges_neo4j(
    dataframe=df_knowledge, 
    label_node="Occupation", 
    label_node2="Knowledge",
    search_property_node="codCBO", # Property in node
    search_property_node2="name", # Property in node
    property_node="codCBO", # Column in dataframe
    property_node2="descriptionKnowledge", # Column in dataframe
    relationship_name="FREQUENCY", # Label of relationship
    property_in_edge=True, # Verification if exists property in edges
    property_edge="level", # Name to property in edges
    weigth_edge="frequency" # Column with data to put in weight of edges
    )

#### Create edges called IMPORTANCE of Occupation nodes to Knowledge nodes

In [None]:
driver_neo4j.send_edges_neo4j(
    dataframe=df_knowledge, 
    label_node="Occupation", 
    label_node2="Knowledge",
    search_property_node="codCBO", # Property in node
    search_property_node2="name", # Property in node
    property_node="codCBO", # Column in dataframe
    property_node2="descriptionKnowledge", # Column in dataframe
    relationship_name="IMPORTANCE", # Label of relationship
    property_in_edge=True, # Verification if exists property in edges
    property_edge="level", # Name to property in edges
    weigth_edge="importance" # Column with data to put in weight of edges
    )