In [1]:
import pandas as pd

In [2]:
# importing required package
import sqlite3 as sq
import plotly.graph_objs as go
import plotly.express as px

In [3]:
# Creating Database
conn = sq.connect('brain_stroke.db')

# Create a table called brainStroke
query = """
CREATE TABLE brainStroke(
    gender VARCHAR(10),
    age DECIMAL(4,1),
    hypertension SMALLINT,
    heart_disease SMALLINT,
    ever_married VARCHAR(5),
    work_type VARCHAR(20),
    residence_type VARCHAR(10),
    avg_glucose_level DECIMAL(5,2),
    bmi DECIMAL(4,1),
    smoking_status VARCHAR(15),
    stroke SMALLINT
)
"""

In [4]:
with conn:
    cur = conn.cursor()
    cur.execute("DROP TABLE IF EXISTS brainStroke")
    cur.execute(query)

In [5]:
# check our sqlite master for confirmation
query = "SELECT name FROM sqlite_master WHERE type='table'"
cur.execute(query)
cur.fetchall()

[('brainStroke',)]

In [6]:
brain_df = pd.read_csv('brain_stroke.csv')

In [7]:
brain_df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
2,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
3,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81.0,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [8]:
csv_tuple = tuple(brain_df.itertuples(index=False, name=None))
csv_tuple[0]

('Male',
 67.0,
 0,
 1,
 'Yes',
 'Private',
 'Urban',
 228.69,
 36.6,
 'formerly smoked',
 1)

In [55]:
insert_query = "INSERT INTO brainStroke VALUES(?,?,?,?,?,?,?,?,?,?,?)"
cur.executemany(insert_query, csv_tuple)

<sqlite3.Cursor at 0x27a1cb585e0>

In [48]:
query = "SELECT * FROM brainStroke LIMIT 5"
# cur.execute(query)
# cur.fetchall()

In [49]:
pd.read_sql_query(query, conn)

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,Male,80,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
2,Female,49,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
3,Female,79,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [50]:
query = """
SELECT gender, COUNT(*) count
FROM brainStroke
GROUP BY gender
"""
pd.read_sql_query(query, conn)

Unnamed: 0,gender,count
0,Female,2907
1,Male,2074


In [51]:
query = """
SELECT gender, stroke, COUNT(*) count
FROM brainStroke
GROUP BY gender, stroke
"""
pd.read_sql_query(query, conn)

Unnamed: 0,gender,stroke,count
0,Female,0,2767
1,Female,1,140
2,Male,0,1966
3,Male,1,108


In [52]:
query = """
SELECT gender, COUNT(*) has_both_hyptnsn_hrt_dis
FROM brainStroke
WHERE hypertension = 1 AND heart_disease = 1
GROUP BY gender
"""
pd.read_sql_query(query, conn)

Unnamed: 0,gender,has_both_hyptnsn_hrt_dis
0,Female,29
1,Male,35


In [56]:
query = """
SELECT residence_type, stroke,  COUNT(*) count
FROM brainStroke 
GROUP BY residence_type, stroke
"""
pd.read_sql_query(query, conn)

Unnamed: 0,residence_type,stroke,count
0,Rural,0,2336
1,Rural,1,113
2,Urban,0,2397
3,Urban,1,135


In [57]:
query = """
SELECT MAX(age) max_age, MIN(age) min_age, AVG(age) avg_age
FROM brainStroke
"""
pd.read_sql_query(query, conn)

Unnamed: 0,max_age,min_age,avg_age
0,82,0.08,43.419859


In [59]:
query = """
SELECT *
FROM brainStroke
WHERE age < 1
LIMIT 10
"""
pd.read_sql_query(query, conn)

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Female,0.64,0,0,No,children,Urban,83.82,24.9,Unknown,0
1,Female,0.88,0,0,No,children,Rural,88.11,15.5,Unknown,0
2,Female,0.32,0,0,No,children,Rural,73.71,16.2,Unknown,0
3,Male,0.88,0,0,No,children,Rural,157.57,19.2,Unknown,0
4,Male,0.24,0,0,No,children,Rural,118.87,16.3,Unknown,0
5,Female,0.32,0,0,No,children,Rural,55.86,16.0,Unknown,0
6,Female,0.72,0,0,No,children,Urban,66.36,23.0,Unknown,0
7,Male,0.8,0,0,No,children,Rural,98.67,17.5,Unknown,0
8,Male,0.4,0,0,No,children,Urban,109.56,14.3,Unknown,0
9,Female,0.08,0,0,No,children,Urban,139.67,14.1,Unknown,0


In [61]:
query = """
SELECT DISTINCT(work_type)
FROM brainStroke
"""
pd.read_sql_query(query, conn)

Unnamed: 0,work_type
0,Private
1,Self-employed
2,Govt_job
3,children
