In [None]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
database_name = 'heart.db'
conn = sqlite3.connect(database_name)

In [None]:
query = "SELECT * FROM patients"
df = pd.read_sql_query(query, conn)

print(df.head())

In [None]:
categorical_vars = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']
for var in categorical_vars:
    df[var] = df[var].astype('category')

# Ensure target column is of type 'category'
df['target'] = df['target'].astype('category')

# Verify the data types and print unique values
print(df.dtypes)
for var in categorical_vars + ['target']:
    print(f"Unique values in {var}: {df[var].unique()}")

In [None]:
plt.figure(figsize=(20, 15))

for i, var in enumerate(categorical_vars):
    plt.subplot(3, 3, i + 1)  # Create a subplot for each variable
    sns.countplot(x=var, hue='target', data=df, palette='Set1')
    plt.title(f'Distribution of {var} by target')
    plt.xlabel(var)
    plt.ylabel('Count')

plt.tight_layout()
plt.show()

In [None]:
conn.close()