In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plts
from qiskit import QuantumCircuit
from qiskit.quantum_info import Statevector
import kagglehub

Carregamento dos dados e análise exploratória

In [26]:
# Carregamento dos dados
df = pd.read_csv("lung_disease_data.csv", delimiter=',')

In [27]:
# Análise exploratória
print('\nHead:')
print(df.head())

print('\nDataset Info:')
df.info()

print('\nMissing Values Count:')
display(df.isnull().sum())


Head:
    Age  Gender Smoking Status  Lung Capacity Disease Type Treatment Type  \
0  71.0  Female             No           4.49         COPD        Therapy   
1  34.0  Female            Yes            NaN   Bronchitis        Surgery   
2  80.0    Male            Yes           1.95         COPD            NaN   
3  40.0  Female            Yes            NaN   Bronchitis     Medication   
4  43.0    Male            Yes           4.60         COPD        Surgery   

   Hospital Visits Recovered  
0             14.0       Yes  
1              7.0        No  
2              4.0       Yes  
3              1.0        No  
4              NaN       Yes  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5200 entries, 0 to 5199
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Age              4900 non-null   float64
 1   Gender           4900 non-null   object 
 2   Smoking Status   4900 non-null   o

Age                300
Gender             300
Smoking Status     300
Lung Capacity      300
Disease Type       300
Treatment Type     300
Hospital Visits    300
Recovered          300
dtype: int64

In [28]:
# Separação dos valores numéricos e categóricos para cleaning e tratamento de NaN's
numeric_cols = ['Age', 'Lung Capacity', 'Hospital Visits']
categorical_cols = ['Gender', 'Smoking Status', 'Disease Type', 'Treatment Type', 'Recovered']

# Tratamento dos valores numéricos
for col in numeric_cols:
    if df[col].isnull().sum() > 0:
        median_val = df[col].median()
        df[col].fillna(median_val, inplace=True)
        print(f"Preenchimento da coluna {col} com os valores médios {median_val}")

# Tratamento dos valores categóricos
for col in categorical_cols:
    if df[col].isnull().sum() > 0:
        mode_val = df[col].mode()[0]
        df[col].fillna(mode_val, inplace=True)
        print(f"Preenchimento da coluna {col} com a moda dos valores '{mode_val}'")

# Transformação de 'yes' em 1 e 'no' em 0
df['Recovered_binary'] = df['Recovered'].apply(lambda x: 1 if x.lower() == 'yes' else 0)

# Nova checagem sobre valores faltantes
print('\nMissing values after imputation:')
display(df[numeric_cols + categorical_cols].isnull().sum())

Preenchimento da coluna Age com os valores médios 54.0
Preenchimento da coluna Lung Capacity com os valores médios 3.48
Preenchimento da coluna Hospital Visits com os valores médios 8.0
Preenchimento da coluna Gender com a moda dos valores 'Female'
Preenchimento da coluna Smoking Status com a moda dos valores 'Yes'
Preenchimento da coluna Disease Type com a moda dos valores 'Bronchitis'
Preenchimento da coluna Treatment Type com a moda dos valores 'Medication'
Preenchimento da coluna Recovered com a moda dos valores 'Yes'

Missing values after imputation:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always beha

Age                0
Lung Capacity      0
Hospital Visits    0
Gender             0
Smoking Status     0
Disease Type       0
Treatment Type     0
Recovered          0
dtype: int64

In [29]:
# One-hot encoding nas features categóricas
df = pd.get_dummies(df)
df

Unnamed: 0,Age,Lung Capacity,Hospital Visits,Recovered_binary,Gender_Female,Gender_Male,Smoking Status_No,Smoking Status_Yes,Disease Type_Asthma,Disease Type_Bronchitis,Disease Type_COPD,Disease Type_Lung Cancer,Disease Type_Pneumonia,Treatment Type_Medication,Treatment Type_Surgery,Treatment Type_Therapy,Recovered_No,Recovered_Yes
0,71.0,4.49,14.0,1,True,False,True,False,False,False,True,False,False,False,False,True,False,True
1,34.0,3.48,7.0,0,True,False,False,True,False,True,False,False,False,False,True,False,True,False
2,80.0,1.95,4.0,1,False,True,False,True,False,False,True,False,False,True,False,False,False,True
3,40.0,3.48,1.0,0,True,False,False,True,False,True,False,False,False,True,False,False,True,False
4,43.0,4.60,8.0,1,False,True,False,True,False,False,True,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5195,40.0,1.43,1.0,1,False,True,True,False,False,False,False,True,False,True,False,False,False,True
5196,21.0,1.50,4.0,0,True,False,False,True,False,False,True,False,False,True,False,False,True,False
5197,42.0,5.53,5.0,0,True,False,True,False,False,False,False,False,True,False,True,False,True,False
5198,82.0,3.68,9.0,0,False,True,True,False,False,True,False,False,False,True,False,False,True,False


In [None]:
cols_boolean = ['Gender_Female', 'Gender_Male', 'Smoking Status_No', 'Smoking Status_Yes',	
                'Disease Type_Asthma', 'Disease Type_Bronchitis', 'Disease Type_COPD',
                'Disease Type_Lung Cancer', 'Disease Type_Pneumonia', 'Treatment Type_Medication',	
                'Treatment Type_Surgery', 'Treatment Type_Therapy', 'Recovered_No', 'Recovered_Yes']
for col in cols_boolean:
    df[col] = df[col].apply(lambda x: 1 if x == True else 0)

df

Unnamed: 0,Age,Lung Capacity,Hospital Visits,Recovered_binary,Gender_Female,Gender_Male,Smoking Status_No,Smoking Status_Yes,Disease Type_Asthma,Disease Type_Bronchitis,Disease Type_COPD,Disease Type_Lung Cancer,Disease Type_Pneumonia,Treatment Type_Medication,Treatment Type_Surgery,Treatment Type_Therapy,Recovered_No,Recovered_Yes
0,71.0,4.49,14.0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1
1,34.0,3.48,7.0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0
2,80.0,1.95,4.0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1
3,40.0,3.48,1.0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0
4,43.0,4.60,8.0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5195,40.0,1.43,1.0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1
5196,21.0,1.50,4.0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0
5197,42.0,5.53,5.0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0
5198,82.0,3.68,9.0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0


Unnamed: 0,Age,Lung Capacity,Hospital Visits,Recovered_binary,Gender_Female,Gender_Male,Smoking Status_No,Smoking Status_Yes,Disease Type_Asthma,Disease Type_Bronchitis,Disease Type_COPD,Disease Type_Lung Cancer,Disease Type_Pneumonia,Treatment Type_Medication,Treatment Type_Surgery,Treatment Type_Therapy,Recovered_No,Recovered_Yes
0,71.0,4.49,14.0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,1
1,34.0,3.48,7.0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0
2,80.0,1.95,4.0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1
3,40.0,3.48,1.0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0
4,43.0,4.60,8.0,1,0,1,0,1,0,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5195,40.0,1.43,1.0,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1
5196,21.0,1.50,4.0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0
5197,42.0,5.53,5.0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0
5198,82.0,3.68,9.0,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0


In [32]:
'''# Passo 3: Codificação quântica (Amplitude Encoding)
image_data = images_reduced[0]
image_data_normalized = image_data / np.linalg.norm(image_data)
state = Statevector(image_data_normalized)
num_qubits = int(np.log2(len(image_data_normalized)))
qc = QuantumCircuit(num_qubits)
qc.initialize(state, range(num_qubits))

# Passo 4: Executar o circuito
simulator = Aer.get_backend('statevector_simulator')
result = execute(qc, simulator).result()
statevector = result.get_statevector()

# Passo 5: Pós-processamento
classical_data = np.abs(statevector)**2
print("Dados clássicos reconstruídos:", classical_data)'''

'# Passo 3: Codificação quântica (Amplitude Encoding)\nimage_data = images_reduced[0]\nimage_data_normalized = image_data / np.linalg.norm(image_data)\nstate = Statevector(image_data_normalized)\nnum_qubits = int(np.log2(len(image_data_normalized)))\nqc = QuantumCircuit(num_qubits)\nqc.initialize(state, range(num_qubits))\n\n# Passo 4: Executar o circuito\nsimulator = Aer.get_backend(\'statevector_simulator\')\nresult = execute(qc, simulator).result()\nstatevector = result.get_statevector()\n\n# Passo 5: Pós-processamento\nclassical_data = np.abs(statevector)**2\nprint("Dados clássicos reconstruídos:", classical_data)'