In [47]:
#@title Conectar com o Google Drive

from google.colab import drive
drive.mount('/content/drive')

#Conectando o ambiente ao Google Drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [48]:
#@title  Puxar Base de Dados


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Importando bibliotecas necessárias.

uri = '/content/drive/Shareddrives/Colab_Grupo2/Colab/Data/dataToPredictTarget.csv'

data = pd.read_csv(uri)
#Declarando caminho do conjunto de dados e instanciando um dataframe.

data.shape

(3977, 37)

In [49]:
#@title  Criar cópia

df = data[['follow_up_days']].copy()

df.dtypes

follow_up_days    float64
dtype: object

In [50]:
#@title  Transformar follow_up_days em números inteiros

df = df.astype(int)

In [51]:
#@title  Criar um Dataframe com uma coluna de inteiros

follow_up_classified = pd.DataFrame(columns=['follow_up_classified'], dtype=int)

In [52]:
#@title  Classificação do follow_up_days em baixo, médio, alto e muito alto

classifiers = []
def classify_follow_up_days(x):
  if x < data['follow_up_days'].quantile(0.33):
    classifiers.append(0)
  elif x < data['follow_up_days'].quantile(0.66):
    classifiers.append(1)
  else:
    classifiers.append(2)
df['follow_up_days'].apply(
  classify_follow_up_days
)
df['follow_up_classified'] = 0
count = 0
for classification in classifiers:
  follow_up_classified = follow_up_classified.append({
      'follow_up_classified': classifiers[count]}, ignore_index=True)
  count += 1

#Criação de loop para classificar o follow up dates em baixo, médio, alto e muito alto.

In [53]:
#@title  Criação de um id temporário para cada linha

df['follow_up_classified'] = follow_up_classified['follow_up_classified']
df['id'] = 1

count = -1

for n in range(len(df.id)):
  df['id'][n] = count + df['id'][n]
  count += 1
df.drop('follow_up_days', axis=1, inplace=True)
data['id'] = df['id']

#Adicionar ID temporário para facilitar a o tratamento de dados

In [54]:
#@title  Juntar as bases de dados

new_data = pd.merge(
    df,
    data,
    left_on='id',
    right_on='id',
    how='left'
)
# apaga a coluna follow_up_days para não dar dataliquid
del new_data["follow_up_days"]
del new_data["id"]

#Juntar as bases de dados.


In [55]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

new_data.head(100)

Unnamed: 0,follow_up_classified,bmi,weight,height,contraceptive_method___1,antec_fam_cancer_mama,alcohol_type___1,alcohol_type___2,familial_degree___1,familial_degree___2,familial_degree___3,family_cancer_id___1,family_cancer_id___2,family_cancer_id___3,family_cancer_id___4,family_cancer_id___6,er_ihc,pr_ihc,tumor_subtype,ultinfo,reclocal,recregio,recdist,trathosp,topo,basediag,morfo,tumor_stage,t,n,m,treatment_time,exam_time,her2_ihc,recidive,age,escolari
0,2,0.489334,-0.016035,152.1,0.0,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,9.0,509.0,3.0,85003.0,10.0,11.0,0.0,0.0,4382.91,1826.2125,1.0,1.0,27027.945,2.0
1,2,1.302154,0.122837,157.909091,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,4.0,0.0,0.0,0.0,8.0,504.0,3.0,85003.0,21.0,2.0,0.0,0.0,1826.2125,1460.97,4.0,0.0,20818.8225,2.0
2,2,4.553521,0.045902,155.117647,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0,1.0,1.0,0.0,5.0,508.0,3.0,85003.0,31.0,3.0,1.0,0.0,2921.94,4382.91,1.0,1.0,26297.46,4.0
3,2,0.605163,0.022619,152.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,0.0,0.0,9.0,509.0,3.0,84803.0,21.0,2.0,0.0,0.0,3287.1825,1460.97,4.0,0.0,25932.2175,2.0
4,2,0.523438,0.00712,167.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,0.0,0.0,7.0,508.0,3.0,85003.0,22.0,2.0,1.0,0.0,2921.94,1460.97,4.0,0.0,20453.58,3.0
5,2,0.953785,-0.138095,161.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,1.0,0.0,4.0,504.0,3.0,85003.0,21.0,1.0,1.0,0.0,730.485,730.485,1.0,1.0,17896.8825,2.0
6,0,1.576012,0.114815,146.769231,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,1.0,0.0,0.0,0.0,3.0,509.0,3.0,85003.0,21.0,13.0,1.0,0.0,589.79433,4382.91,4.0,0.0,24106.005,9.0
7,2,1.643721,-0.051724,149.333333,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,0.0,1.0,9.0,509.0,3.0,85003.0,22.0,2.0,1.0,0.0,2921.94,1460.97,4.0,1.0,19723.095,9.0
8,0,1.196244,0.094505,154.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,1.0,0.0,3.0,509.0,3.0,85003.0,40.0,4.0,2.0,1.0,0.0,365.2425,4.0,1.0,22279.7925,3.0
9,0,1.826017,-0.052392,158.764706,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,0.0,0.0,9.0,509.0,3.0,85003.0,31.0,3.0,2.0,0.0,365.2425,1826.2125,4.0,0.0,15705.4275,2.0


In [56]:
#@title  Salvar base de dados

new_data.to_csv("/content/drive/Shareddrives/Colab_Grupo2/Colab/Data/data_classified.csv", index = False)