-
Notifications
You must be signed in to change notification settings - Fork 4
/
data modeling.sql
168 lines (151 loc) · 4.59 KB
/
data modeling.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
SELECT * FROM stroke
SELECT COUNT(id) FROM stroke
/*Gender Dimesions*/
SELECT DISTINCT
CASE
WHEN gender = 'Male' THEN '1'
WHEN gender = 'Female' THEN '2'
ELSE '3'
END AS gender_key,
gender
FROM stroke
/*hypertension Dimesions*/
SELECT DISTINCT
hypertension as hypertension_key,
CASE
WHEN hypertension = '1' THEN 'Yes'
ELSE 'No'
END AS hypertension
FROM stroke
/*heart_disease Dimesions*/
SELECT DISTINCT
heart_disease as heart_disease_key,
CASE
WHEN heart_disease = '1' THEN 'Yes'
ELSE 'No'
END AS heart_disease
FROM stroke
/*stroke Dimesions*/
SELECT DISTINCT
stroke as stroke_key,
CASE
WHEN stroke = '1' THEN 'Yes'
ELSE 'No'
END AS stroke
FROM stroke
/*ever_married Dimesions*/
SELECT DISTINCT
CASE
WHEN ever_married = 'Yes' THEN '1'
ELSE '0'
END AS ever_married_key,
ever_married
FROM stroke
/*work_type Dimesions*/
SELECT DISTINCT
CASE
WHEN work_type = 'Self-employed' THEN '1'
WHEN work_type = 'Never_worked' THEN '2'
WHEN work_type = 'children' THEN '3'
WHEN work_type = 'Govt_job' THEN '4'
WHEN work_type = 'Private' THEN '5'
END AS work_type_key,
work_type
FROM stroke
/*Residence_type Dimesions*/
SELECT DISTINCT
CASE
WHEN Residence_type = 'Rural' THEN '0'
WHEN Residence_type = 'Urban' THEN '1'
END AS Residence_type_key,
Residence_type
FROM stroke
/*Diabetes_status Dimesions*/
SELECT DISTINCT
CASE
WHEN avg_glucose_level <= 80 THEN '1'
WHEN avg_glucose_level > 80 AND avg_glucose_level <= 100 THEN '2'
WHEN avg_glucose_level > 100 AND avg_glucose_level < 125 THEN '3'
WHEN avg_glucose_level > 125 THEN '4'
END AS Diabetes_status_key,
CASE
WHEN avg_glucose_level <= 80 THEN 'hypoglycemic'
WHEN avg_glucose_level > 80 AND avg_glucose_level <= 100 THEN 'Normal'
WHEN avg_glucose_level > 100 AND avg_glucose_level < 125 THEN 'Prediabetes'
WHEN avg_glucose_level > 125 THEN 'Diabetic'
END AS Diabetes_status
FROM stroke;
/*bmi Dimesions*/
SELECT DISTINCT
CASE
WHEN bmi <= 18.5 THEN '1'
WHEN bmi > 18.5 AND bmi <= 24.9 THEN '2'
WHEN bmi > 24.9 AND bmi <= 29.9 THEN '3'
WHEN bmi > 29.9 THEN '4'
END AS bmi_key,
CASE
WHEN bmi <= 18.5 THEN 'underweight'
WHEN bmi > 18.5 AND bmi <= 24.9 THEN 'Normal Weight'
WHEN bmi > 24.9 AND bmi <= 29.9 THEN 'Overweight'
WHEN bmi > 29.9 THEN 'Obese'
END AS bmi_status
FROM stroke
/*smoking_status Dimesions*/
SELECT DISTINCT
CASE
WHEN smoking_status = 'never smoked' THEN '1'
WHEN smoking_status = 'smokes' THEN '2'
WHEN smoking_status = 'formerly smoked' THEN '3'
WHEN smoking_status = 'Unknown' THEN '4'
END AS smoking_status_key,
smoking_status
FROM stroke
SELECT * FROM stroke
/*Fact table*/
SELECT
id,
CASE
WHEN gender = 'Male' THEN '1'
WHEN gender = 'Female' THEN '2'
ELSE '3'
END AS gender_key,
age,
hypertension as hypertension_key,
heart_disease as heart_disease_key,
CASE
WHEN ever_married = 'Yes' THEN '1'
ELSE '0'
END AS ever_married_key,
CASE
WHEN work_type = 'Self-employed' THEN '1'
WHEN work_type = 'Never_worked' THEN '2'
WHEN work_type = 'children' THEN '3'
WHEN work_type = 'Govt_job' THEN '4'
WHEN work_type = 'Private' THEN '5'
END AS work_type_key,
CASE
WHEN Residence_type = 'Rural' THEN '0'
WHEN Residence_type = 'Urban' THEN '1'
END AS Residence_type_key,
avg_glucose_level,
CASE
WHEN smoking_status = 'never smoked' THEN '1'
WHEN smoking_status = 'smokes' THEN '2'
WHEN smoking_status = 'formerly smoked' THEN '3'
WHEN smoking_status = 'Unknown' THEN '4'
END AS smoking_status_key,
CASE
WHEN avg_glucose_level <= 80 THEN '1'
WHEN avg_glucose_level > 80 AND avg_glucose_level <= 100 THEN '2'
WHEN avg_glucose_level > 100 AND avg_glucose_level < 125 THEN '3'
WHEN avg_glucose_level > 125 THEN '4'
END AS Diabetes_status_key,
bmi,
CASE
WHEN bmi <= 18.5 THEN '1'
WHEN bmi > 18.5 AND bmi <= 24.9 THEN '2'
WHEN bmi > 24.9 AND bmi <= 29.9 THEN '3'
WHEN bmi > 29.9 THEN '4'
END AS bmi_key,
stroke as stroke_key
FROM stroke