# **Procura de Problemas Éticos em Issues**

In [2]:
#%pip install pandas
#%pip install nltk
#%pip install ipywidgets


import pandas as pd
import os
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

# **Implementando palavras éticas obtidas em txt**

In [3]:
with open("IET-Software-2023-Biable-Proposed-ethical-framework-for-software-requirements-engineering.txt", "r", encoding="utf-8") as arquivo:
    texto = arquivo.read()
    
texto = texto.lower()
texto

'received: 24 september 2022\n\ndoi: 10.1049/sfw2.12136\n\n\noriginal research\n                               - -\n                                 revised: 19 may 2023      accepted: 27 june 2023\n\n\n                                                                                                                                                      -   iet software\n\n\n\n\nproposed ethical framework for software requirements\nengineering\n\nseblewongel e. biable1                             | nuno m. garcia2,3                          | dida midekso4\n\n1\naddis ababa university, addis ababa, ethiopia           abstract\n2\n faculdade de ciências, universidade de lisboa,         requirements engineering is a fundamental process in software development phases. at\nlisbon, portugal                                        the same time, it is a difficult phase and exposed many ethical violations. the main\n3\ninstituto de telecomunicações, covilhã, portugal        purpose is proposing a

In [4]:
# Definindo as StopWords
nltk.download('stopwords')
stopWords = set(stopwords.words('english'))

def removerStopWords(palavras):
    return [palavra for palavra in palavras if palavra not in stopWords and palavra.isalpha()]

# Criando um tokenizer que considera expressões
expressoes = {'influence factors', 'knowledge gap', 'non-maleficence', 'professional Behaviour', 'professional codes', 'requirements engineering', 'requirements identification',
              'requirements quality', 'working environment'}

expressaoRegular = r'\b(?:' + '|'.join(expressoes) + r')\b|\w+'
tokenizer = RegexpTokenizer(expressaoRegular)


# Tokenize dos comentarios das issues

textoToK = tokenizer.tokenize(texto)
textoToK = removerStopWords(textoToK)

textoToK


[nltk_data] Downloading package stopwords to /home/zoega/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['received',
 'september',
 'doi',
 'original',
 'research',
 'revised',
 'may',
 'accepted',
 'june',
 'iet',
 'software',
 'proposed',
 'ethical',
 'framework',
 'software',
 'requirements',
 'engineering',
 'seblewongel',
 'e',
 'nuno',
 'dida',
 'addis',
 'ababa',
 'university',
 'addis',
 'ababa',
 'ethiopia',
 'abstract',
 'faculdade',
 'de',
 'ciências',
 'universidade',
 'de',
 'lisboa',
 'fundamental',
 'process',
 'software',
 'development',
 'phases',
 'lisbon',
 'portugal',
 'time',
 'difficult',
 'phase',
 'exposed',
 'many',
 'ethical',
 'violations',
 'main',
 'instituto',
 'de',
 'telecomunicações',
 'covilhã',
 'portugal',
 'purpose',
 'proposing',
 'ethical',
 'framework',
 'software',
 'department',
 'computer',
 'science',
 'addis',
 'ababa',
 'addresses',
 'identified',
 'concerns',
 'concerns',
 'include',
 'problems',
 'associated',
 'university',
 'addis',
 'ababa',
 'ethiopia',
 'quality',
 'related',
 'concerns',
 'unwillingness',
 'give',
 'requirements',
 'p

In [5]:
PalavrasEticas = [
    "autonomy",
    "beneficence",
    "bias",
    "business",
    "choices",
    "considerations",
    "deceptive",
    "dignity",
    "exposed",
    "fairness",
    "factors",
    "forbidden",
    "framework",
    "freedom",
    "governance",
    "guidelines",
    "harm",
    "identification",
    "importance",
    "influence",
    "influence factors",
    "issues",
    "justice",
    "knowledge",
    "knowledge gap",
    "lack",
    "legal",
    "management",
    "negligence",
    "non-maleficence",
    "norms",
    "obligation",
    "operation",
    "perspective",
    "practices",
    "principle",
    "privacy",
    "problems",
    "professional Behaviour",
    "professional codes",
    "profit",
    "quality",
    "relevance",
    "requirements",
    "requirements engineering",
    "requirements identification",
    "requirements quality",
    "responsible",
    "responsibility",
    "risk",
    "rules",
    "side",
    "skill",
    "solidarity",
    "stage",
    "standard",
    "sustainability",
    "technical",
    "tool",
    "toxic",
    "transparency",
    "trustworthy",
    "unpermitted",
    "values",
    "violation",
    "viability",
    "visibility",
    "washing",
    "working environment"
]

# Criando o DataFrame que guardará as ocorrências dos comentários
ocorrenciasArtigo = pd.DataFrame()
linhaInicial = 0 * len(PalavrasEticas)
palavrasAntes = "" * len(PalavrasEticas)
palavrasDepois = "" * len(PalavrasEticas)

ocorrenciasArtigo['PalavrasEticas'] = PalavrasEticas
ocorrenciasArtigo['Ocorrencias'] = linhaInicial
ocorrenciasArtigo["PalavrasAntes"] = palavrasAntes
ocorrenciasArtigo["PalavrasDepois"] = palavrasDepois
ocorrenciasArtigo = ocorrenciasArtigo.set_index('PalavrasEticas')

ocorrenciasArtigo

Unnamed: 0_level_0,Ocorrencias,PalavrasAntes,PalavrasDepois
PalavrasEticas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
autonomy,0,,
beneficence,0,,
bias,0,,
business,0,,
choices,0,,
...,...,...,...
violation,0,,
viability,0,,
visibility,0,,
washing,0,,


In [6]:
for i in range(len(textoToK)):
            
        if(textoToK[i] in PalavrasEticas):
            
            ocorrenciasArtigo.loc[textoToK[i], 'Ocorrencias'] += 1
            
            if((i - 1) >= 0):
                if(ocorrenciasArtigo.loc[textoToK[i],'PalavrasAntes'] == ''):
                    ocorrenciasArtigo.loc[textoToK[i], 'PalavrasAntes'] = textoToK[i - 1]
                else:
                    ocorrenciasArtigo.loc[textoToK[i], 'PalavrasAntes'] += ('\n' + textoToK[i - 1])
            if((i + 1) <= (len(textoToK) - 1)):
                if(ocorrenciasArtigo.loc[textoToK[i], 'PalavrasDepois'] == ''):
                    ocorrenciasArtigo.loc[textoToK[i], 'PalavrasDepois'] = textoToK[i + 1]
                else:
                    ocorrenciasArtigo.loc[textoToK[i], 'PalavrasDepois'] += ('\n' + textoToK[i + 1])

ocorrenciasArtigo = ocorrenciasArtigo[ocorrenciasArtigo['Ocorrencias'] > 0]
ocorrenciasArtigo

Unnamed: 0_level_0,Ocorrencias,PalavrasAntes,PalavrasDepois
PalavrasEticas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
business,1,change,environments
choices,1,influence,made
considerations,1,ethical,found
deceptive,4,systems\nrequirements\npurpose\nkinds,incomplete\nrequirements\nrequirements\npractices
exposed,2,phase\nengi,many\nmany
factors,2,human\nhuman,software\nclients
forbidden,7,practicing\nperforming\nethics\nassociated\nfi...,activities\nactivities\nunpermitted\nunpermitt...
framework,98,ethical\nethical\nproposed\nproposed\nproposed...,software\nsoftware\ncomponents\nsuggests\nexpe...
harm,2,ethical\nrequirements,due\nstakeholders
identification,4,software\nsoftware\nspecifications\nattention,knowledge\nclauses\ncon\nanalysis


In [9]:
ocorrenciasArtigo = ocorrenciasArtigo.reset_index()

caminhoPasta = 'out'
nomeArquivo = 'Artigo.csv'
caminhoArquivo = os.path.join(caminhoPasta, nomeArquivo)

if not os.path.exists(caminhoPasta):
    os.makedirs(caminhoPasta)

ocorrenciasArtigo.to_csv(caminhoArquivo, sep=',', index=False, header=True, na_rep='N/A', encoding='utf-8')