-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathproject2.sql
127 lines (75 loc) · 4.63 KB
/
project2.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
-- -----------------------------------------------------------------------------------------------------------------------
-- --------------------------------------------- Collecting our data -----------------------------------------------------
-- -----------------------------------------------------------------------------------------------------------------------
-- first off, before importing our data we should create a database to load our data into it
-- or just load our table into an existing database.
CREATE DATABASE PROJECT;
-- We want to work on our databse.
USE PROJECT;
-- Create a table that will contain the csv file we want to import.
CREATE TABLE calls (
ID CHAR(50),
cust_name CHAR (50),
sentiment CHAR (20),
csat_score INT,
call_timestamp CHAR (10),
reason CHAR (20),
city CHAR (20),
state CHAR (20),
channel CHAR (20),
response_time CHAR (20),
call_duration_minutes INT,
call_center CHAR (20)
);
-- Here we used table data import wizard and loaded our data in. Let's check how it looks.
SELECT * FROM calls LIMIT 10;
-- -----------------------------------------------------------------------------------------------------------------------
-- --------------------------------------------- Cleaning our data -------------------------------------------------------
-- -----------------------------------------------------------------------------------------------------------------------
-- The call_timestamp is a string, so we need to convert it to a date.
SET SQL_SAFE_UPDATES = 0;
UPDATE calls SET call_timestamp = str_to_date(call_timestamp, "%m/%d/%Y");
UPDATE calls SET csat_score = NULL WHERE csat_score = 0;
SET SQL_SAFE_UPDATES = 1;
SELECT * FROM calls LIMIT 10;
-- -----------------------------------------------------------------------------------------------------------------------
-- --------------------------------------------- Exploring our data ------------------------------------------------------
-- -----------------------------------------------------------------------------------------------------------------------
-- lets see the shape pf our data, i.e, the number of columns and rows
SELECT COUNT(*) AS rows_num FROM calls;
SELECT COUNT(*) AS cols_num FROM information_schema.columns WHERE table_name = 'calls' ;
-- Checking the distinct values of some columns:
SELECT DISTINCT sentiment FROM calls;
SELECT DISTINCT reason FROM calls;
SELECT DISTINCT channel FROM calls;
SELECT DISTINCT response_time FROM calls;
SELECT DISTINCT call_center FROM calls;
-- The count and precentage from total of each of the distinct values we got:
SELECT sentiment, count(*), ROUND((COUNT(*) / (SELECT COUNT(*) FROM calls)) * 100, 1) AS pct
FROM calls GROUP BY 1 ORDER BY 3 DESC;
SELECT reason, count(*), ROUND((COUNT(*) / (SELECT COUNT(*) FROM calls)) * 100, 1) AS pct
FROM calls GROUP BY 1 ORDER BY 3 DESC;
SELECT channel, count(*), ROUND((COUNT(*) / (SELECT COUNT(*) FROM calls)) * 100, 1) AS pct
FROM calls GROUP BY 1 ORDER BY 3 DESC;
SELECT response_time, count(*), ROUND((COUNT(*) / (SELECT COUNT(*) FROM calls)) * 100, 1) AS pct
FROM calls GROUP BY 1 ORDER BY 3 DESC;
SELECT call_center, count(*), ROUND((COUNT(*) / (SELECT COUNT(*) FROM calls)) * 100, 1) AS pct
FROM calls GROUP BY 1 ORDER BY 3 DESC;
SELECT state, COUNT(*) FROM calls GROUP BY 1 ORDER BY 2 DESC;
SELECT DAYNAME(call_timestamp) as Day_of_call, COUNT(*) num_of_calls FROM calls GROUP BY 1 ORDER BY 2 DESC;
-- Aggregations :
SELECT MIN(csat_score) AS min_score, MAX(csat_score) AS max_score, ROUND(AVG(csat_score),1) AS avg_score
FROM calls WHERE csat_score != 0;
SELECT MIN(call_timestamp) AS earliest_date, MAX(call_timestamp) AS most_recent FROM calls;
SELECT MIN(call_duration_minutes) AS min_call_duration, MAX(call_duration_minutes) AS max_call_duration, AVG(call_duration_minutes) AS avg_call_duration FROM calls;
SELECT call_center, response_time, COUNT(*) AS count
FROM calls GROUP BY 1,2 ORDER BY 1,3 DESC;
SELECT call_center, AVG(call_duration_minutes) FROM calls GROUP BY 1 ORDER BY 2 DESC;
SELECT channel, AVG(call_duration_minutes) FROM calls GROUP BY 1 ORDER BY 2 DESC;
SELECT state, COUNT(*) FROM calls GROUP BY 1 ORDER BY 2 DESC;
SELECT state, reason, COUNT(*) FROM calls GROUP BY 1,2 ORDER BY 1,2,3 DESC;
SELECT state, sentiment , COUNT(*) FROM calls GROUP BY 1,2 ORDER BY 1,3 DESC;
SELECT state, AVG(csat_score) as avg_csat_score FROM calls WHERE csat_score != 0 GROUP BY 1 ORDER BY 2 DESC;
SELECT sentiment, AVG(call_duration_minutes) FROM calls GROUP BY 1 ORDER BY 2 DESC;
-- more advanced queries.
SELECT call_timestamp, MAX(call_duration_minutes) OVER(PARTITION BY call_timestamp) AS max_call_duration FROM calls GROUP BY 1 ORDER BY 2 DESC;