# Heart Disease Data Exploration with SQLite and Pandas

### Creating SQLite database

In [1]:
import sqlite3
conn = sqlite3.connect('healthcare.db')  # Create or connect to the SQLite database


### Loading dataset into Pandas

In [2]:
import pandas as pd
df = pd.read_csv('heart_disease_uci.csv')  # Replace with the actual path to your CSV file


### Inserting data into SQLite database

In [3]:
df.to_sql('healthcare_data', conn, if_exists='replace', index=False)


920

### Selecting all records where a patient is older than 60:

In [4]:
query1 = pd.read_sql_query("SELECT * FROM healthcare_data WHERE age > 60", conn)
print(query1)


      id  age     sex        dataset               cp  trestbps   chol  fbs  \
0      1   63    Male      Cleveland   typical angina     145.0  233.0  1.0   
1      2   67    Male      Cleveland     asymptomatic     160.0  286.0  0.0   
2      3   67    Male      Cleveland     asymptomatic     120.0  229.0  0.0   
3      7   62  Female      Cleveland     asymptomatic     140.0  268.0  0.0   
4      9   63    Male      Cleveland     asymptomatic     130.0  254.0  0.0   
..   ...  ...     ...            ...              ...       ...    ...  ...   
216  910   68    Male  VA Long Beach      non-anginal     134.0  254.0  1.0   
217  912   62    Male  VA Long Beach     asymptomatic     160.0  254.0  1.0   
218  914   62    Male  VA Long Beach     asymptomatic     158.0  170.0  0.0   
219  917   62    Male  VA Long Beach   typical angina       NaN  139.0  0.0   
220  920   62    Male  VA Long Beach  atypical angina     120.0  254.0  0.0   

              restecg  thalch  exang  oldpeak      

### Counting number of records where sex is Female:

In [8]:
query2 = "SELECT COUNT(*) FROM healthcare_data WHERE sex = 'Female'"
result2 = pd.read_sql_query(query2, conn)
print(result2)


   COUNT(*)
0       194


### Grouping by chest pain type (cp) and calculating average age for each group.

In [9]:
query3 = "SELECT cp, AVG(age) as avg_age FROM healthcare_data GROUP BY cp"
result3 = pd.read_sql_query(query3, conn)
print(result3)


                cp    avg_age
0     asymptomatic  54.959677
1  atypical angina  49.241379
2      non-anginal  53.333333
3   typical angina  54.826087


### Sorting records by cholesterol level (chol) and returning top 5 patients with highest cholesterol:

In [10]:
query4 = "SELECT * FROM healthcare_data ORDER BY chol DESC LIMIT 5"
result4 = pd.read_sql_query(query4, conn)
print(result4)


    id  age     sex    dataset            cp  trestbps   chol  fbs  \
0  547   54    Male    Hungary  asymptomatic     130.0  603.0    1   
1  153   67  Female  Cleveland   non-anginal     115.0  564.0    0   
2  529   32    Male    Hungary  asymptomatic     118.0  529.0    0   
3  569   53    Male    Hungary   non-anginal     145.0  518.0    0   
4  585   44    Male    Hungary  asymptomatic     135.0  491.0    0   

          restecg  thalch  exang  oldpeak slope   ca               thal  num  
0          normal   125.0      1      1.0  flat  NaN               None    1  
1  lv hypertrophy   160.0      0      1.6  flat  0.0  reversable defect    0  
2          normal   130.0      0      0.0  None  NaN               None    1  
3          normal   130.0      0      0.0  None  NaN               None    1  
4          normal   135.0      0      0.0  None  NaN               None    1  
