# **Procura de Problemas Éticos em Issues**

In [2]:
import pandas as pd
import os

## **Fazendo a leitura dos dados:**

In [3]:
issues = pd.read_csv("out/yourrepo.csv")
comentarios = pd.read_csv("out/yourrepo.csv")

print(issues['DescricaoIssue'].size)

comentarios = comentarios.drop(columns=['NumeroComentario','DataComentario','AutorComentario','Tags','IdIssue'])
issues["Comentario"] = issues['DescricaoIssue'] + issues['TituloIssue']
issues = issues.drop(columns=['NumeroIssue','CriacaoIssue','RepositorioIssue','LinkIssue','IdIssue','DescricaoIssue','TituloIssue'])

# Concatenando as informações em um único dataframe
comentarios = pd.concat([issues, comentarios], axis=0)
comentarios = comentarios.reset_index(drop=True)
comentarios

985


Unnamed: 0,Comentario
0,Hello 👋 \r\n\r\nI want to setup a multi tenant...
1,### Motivation / Background\r\n\r\n`model_name...
2,"### Steps to reproduce\r\nruby ""3.3.0""\r\ngem ..."
3,Managed to reproduce Rails Nightly CI failure ...
4,Fixes #53250.\r\n\r\nAccording to MySQL's docu...
...,...
7053,I seem to have a problem related to this.\n\nI...
7054,"@pixeltrix, my last comment was using 4.1.8\n"
7055,ping @pixeltrix @tenderlove - is this somethin...
7056,Still very relevant as of rails 5.1.4\r\n\r\n ...


## **Contando a ocorrência de problemas éticos:**

In [11]:
from collections import Counter
import re

# Definindo os EthicalIssues
EthicalIssues = {'Bias', 'Data Governance', 'Data Protection', 'Encryption', 'Informed Consent', 'Lack of Data', 'Monetization', 'Openness', 'Privacy', 'User Data Collection'
            'Authorship', 'Autonomy', 'Beneficence', 'Business Ethics', 'Commerce', 'Compliance', 'Confidentiality', 'Conflict of Interest', 'Context', 'Dependability',
            'Fairness', 'Human Agency', 'Intellectual Property', 'Oversight', 'Regulatory Approaches', 'Responsibility', 'Trust', 'Trustworthiness', 'Axiology', 'Freedom',
            'Self-Conception', 'Solidarity', 'Utility', 'Care', 'Competence', 'Professional Ethics', 'Work Ethics', 'Access', 'Accessibility', 'Common Goods', 'Dignity',
            'Diversity', 'Equality', 'Equity', 'Humanity', 'Inclusiveness', 'Individual Differences', 'Inequality', 'Justice', 'Non-Discrimination', 'Non-Maleficence',
            'Participation', 'Plurality', 'Prevention of Harm', 'Quality of Life', 'Respect for Human Autonomy', 'Retention and Addiction', 'Social Justice', 'Sustainability',
            'Unemployment', 'Welfare', 'Accountability', 'Accuracy', 'Anonymity', 'Comprehensibility', 'Consistency', 'Contestability', 'Explainability', 'Explicability',
            'Integrity', 'Interpretability', 'Liability', 'Reliability', 'Safety', 'Security', 'Speech Issues', 'Technical Robustness', 'Traceability', 'Transparency',
            'Usability', 'Computer Abuse', 'Malicious Use'}

# Função responsável por coletar as ocorrências
def contaOcorrencias(comentarios, ethicalIssues):
    
    contador = Counter()
    total = 0
   
    for i in range (comentarios['Comentario'].size):
        
        coment = comentarios.loc[i]['Comentario']
        coment = coment.lower()
        
        for expressao in ethicalIssues:
            ocorrencias = re.findall(r'\b' + re.escape(expressao.lower()) + r'\b', coment)
            contador[expressao] += len(ocorrencias)
            total += len(ocorrencias)
    
    
    expressoes = []
    contagens = []
    
    for expressao, contagem in contador.items():
        expressoes.append(expressao)
        contagens.append(contagem)
    
    dfteste = pd.DataFrame({
        'EthicalIssues': expressoes,
        'Ocorrencias': contagens
    })
    
    return dfteste

In [16]:
dfEthicalIssues = contaOcorrencias(comentarios, EthicalIssues)

caminhoPasta = 'out'
nomeArquivo = 'EthicalIssues.csv'
caminhoArquivo = os.path.join(caminhoPasta, nomeArquivo)

if not os.path.exists(caminhoPasta):
    os.makedirs(caminhoPasta)

dfEthicalIssues.to_csv(caminhoArquivo, sep=',', index=False, header=True, na_rep='N/A', encoding='utf-8')

dfEthicalIssues.sort_values(by='Ocorrencias', ascending=False)

Unnamed: 0,EthicalIssues,Ocorrencias
47,Context,225
77,Encryption,132
71,Access,122
39,Security,110
46,Care,43
...,...,...
73,Comprehensibility,0
76,Quality of Life,0
75,Participation,0
74,Freedom,0


### **Separando os Problemas Éticos por tipo:**

In [13]:
DataIssues = {'Bias', 'Data Governance', 'Data Protection', 'Encryption', 'Informed Consent', 'Lack of Data', 'Monetization', 'Openness', 'Privacy', 'User Data Collection'}

GovernanceIssues = {'Authorship', 'Autonomy', 'Beneficence', 'Business Ethics', 'Commerce', 'Compliance', 'Confidentiality', 'Conflict of Interest', 'Context', 'Dependability',
                    'Fairness', 'Human Agency', 'Intellectual Property', 'Oversight', 'Regulatory Approaches', 'Responsibility', 'Trust', 'Trustworthiness'}

PhilosophicalIssues = {'Axiology', 'Freedom', 'Self-Conception', 'Solidarity', 'Utility'}

ProfessionalIssues = {'Care', 'Competence', 'Professional Ethics', 'Work Ethics'}

SocialIssues = {'Access', 'Accessibility', 'Common Goods', 'Dignity', 'Diversity', 'Equality', 'Equity', 'Humanity', 'Inclusiveness', 'Individual Differences', 'Inequality',
                'Justice', 'Non-Discrimination', 'Non-Maleficence', 'Participation', 'Plurality', 'Prevention of Harm', 'Quality of Life', 'Respect for Human Autonomy',
                'Retention and Addiction', 'Social Justice', 'Sustainability', 'Unemployment', 'Welfare'}

TechnicalIssues = {'Accountability', 'Accuracy', 'Anonymity', 'Comprehensibility', 'Consistency', 'Contestability', 'Explainability', 'Explicability', 'Integrity',
                   'Interpretability', 'Liability', 'Reliability', 'Safety', 'Security', 'Speech Issues', 'Technical Robustness', 'Traceability', 'Transparency', 'Usability'}

MaliciousCases = {'Computer Abuse', 'Malicious Use'}


dfTipos = pd.DataFrame()
totalData = 0
totalGovernance = 0
totalPhilosophical = 0
totalProfessional = 0
totalSocial = 0
totalTechnical = 0
totalMalicious = 0

for i in range (dfEthicalIssues['EthicalIssues'].size):
    
    if (dfEthicalIssues.loc[i]['EthicalIssues'] in (DataIssues)):
        totalData += dfEthicalIssues.loc[i]['Ocorrencias']
        
    elif (dfEthicalIssues.loc[i]['EthicalIssues'] in (GovernanceIssues)):
        totalGovernance += dfEthicalIssues.loc[i]['Ocorrencias']

    elif (dfEthicalIssues.loc[i]['EthicalIssues'] in (PhilosophicalIssues)):
        totalPhilosophical += dfEthicalIssues.loc[i]['Ocorrencias']
    
    elif (dfEthicalIssues.loc[i]['EthicalIssues'] in (ProfessionalIssues)):
        totalProfessional += dfEthicalIssues.loc[i]['Ocorrencias']
    
    elif (dfEthicalIssues.loc[i]['EthicalIssues'] in (SocialIssues)):
        totalSocial += dfEthicalIssues.loc[i]['Ocorrencias']
    
    elif (dfEthicalIssues.loc[i]['EthicalIssues'] in (TechnicalIssues)):
        totalTechnical += dfEthicalIssues.loc[i]['Ocorrencias']
    
    else:
        totalMalicious += dfEthicalIssues.loc[i]['Ocorrencias']

dfTipos["Data Issues"] = [totalData]
dfTipos["Governance Issues"] = [totalGovernance]
dfTipos["Philosophical Issues"] = [totalPhilosophical]
dfTipos["Professional Issues"] = [totalProfessional]
dfTipos["Social Issues"] = [totalSocial]
dfTipos["Techinical Issues"] = [totalTechnical]
dfTipos["Malicious Cases"] = [totalMalicious]

caminhoPasta = 'out'
nomeArquivo = 'EthicalIssuesTipos.csv'
caminhoArquivo = os.path.join(caminhoPasta, nomeArquivo)

if not os.path.exists(caminhoPasta):
    os.makedirs(caminhoPasta)

dfTipos.to_csv(caminhoArquivo, sep=',', index=False, header=True, na_rep='N/A', encoding='utf-8')

dfTipos

    
    

Unnamed: 0,Data Issues,Governance Issues,Philosophical Issues,Professional Issues,Social Issues,Techinical Issues,Malicious Cases
0,140,279,7,43,145,171,0
