In [26]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns

# Upload and look at Data

In [27]:
df = pd.read_csv("../archive/Heart_Prediction_Quantum_Dataset.csv")
df.head(5)

Unnamed: 0,Age,Gender,BloodPressure,Cholesterol,HeartRate,QuantumPatternFeature,HeartDisease
0,68,1,105,191,107,8.362241,1
1,58,0,97,249,89,9.249002,0
2,44,0,93,190,82,7.942542,1
3,72,1,93,183,101,6.495155,1
4,37,0,145,166,103,7.6539,1


In [28]:
df.describe()

Unnamed: 0,Age,Gender,BloodPressure,Cholesterol,HeartRate,QuantumPatternFeature,HeartDisease
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,54.864,0.468,132.874,221.5,88.766,8.317407,0.6
std,14.315004,0.499475,26.418516,43.86363,17.417289,0.919629,0.490389
min,30.0,0.0,90.0,150.0,60.0,6.164692,0.0
25%,43.0,0.0,111.0,183.75,73.0,7.675779,0.0
50%,55.0,0.0,132.0,221.0,89.0,8.323064,1.0
75%,66.25,1.0,155.0,258.0,104.0,8.935999,1.0
max,79.0,1.0,179.0,299.0,119.0,10.784886,1.0


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Age                    500 non-null    int64  
 1   Gender                 500 non-null    int64  
 2   BloodPressure          500 non-null    int64  
 3   Cholesterol            500 non-null    int64  
 4   HeartRate              500 non-null    int64  
 5   QuantumPatternFeature  500 non-null    float64
 6   HeartDisease           500 non-null    int64  
dtypes: float64(1), int64(6)
memory usage: 27.5 KB


In [30]:
df.shape

(500, 7)

In [31]:
def reduce_mem_usage(df):

    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object and col_type.name != 'category' and 'datetime' not in col_type.name:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        elif 'datetime' not in col_type.name:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [32]:
df_reduced = reduce_mem_usage(df)
df_reduced.info()

Memory usage of dataframe is 0.03 MB
Memory usage after optimization is: 0.00 MB
Decreased by 81.8%
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Age                    500 non-null    int8   
 1   Gender                 500 non-null    int8   
 2   BloodPressure          500 non-null    int16  
 3   Cholesterol            500 non-null    int16  
 4   HeartRate              500 non-null    int8   
 5   QuantumPatternFeature  500 non-null    float16
 6   HeartDisease           500 non-null    int8   
dtypes: float16(1), int16(2), int8(4)
memory usage: 5.0 KB
