In [1]:
from DbConnector import DbConnector
from part2 import Database 
from tabulate import tabulate

# Set up the program
from dotenv import load_dotenv
import os

load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')

program = DbConnector(USER=user, PASSWORD=password)
db = Database()

Using user:  root
Connected to: 8.0.29
You are connected to the database: ('assignment2',)
-----------------------------------------------

Using user:  root
Connected to: 8.0.29
You are connected to the database: ('assignment2',)
-----------------------------------------------



### Task 1

How many users, activities and trackpoints are there in the dataset (after it is inserted into the database)

In [10]:
# Get the number of Users
users = db.get_table_size("User")
activities = db.get_table_size("Activity")
trackpoints = db.get_table_size("TrackPoint")
print("Number of Users, Activities and TrackPoints")
print(tabulate([["Users", "Activities", "TrackPoints"], [users[0], activities[0], trackpoints[0]]], headers="firstrow"))

Number of Users, Activities and TrackPoints
  Users    Activities    TrackPoints
-------  ------------  -------------
    182          7877        5355109


### Task2
Find the average, maximum and minimum number of trackpoints per user

##### Average

In [11]:
# Get the average number of trackpoints per user
users, columns = db.find_avg_trackpoints()
print("Average trackpoints per user")
print(tabulate(users, headers=columns))


Average trackpoints per user
  user_id    AVG(trackpoints)
---------  ------------------
      000            670.871
      001            824.965
      002            924.801
      003            807.387
      004            761.858
      005            587.548
      006            801.125
      007            758.275
      008           1376
      009           1218.32
      010           1107.5
      011            451.756
      012            629.783
      013           1388.3
      014            905.936
      015            806.583
      016            690.222
      017            868.245
      018            540.795
      019            372.291
      020            704.638
      021            475
      022            900.695
      023            979.312
      024            860.408
      025            606.295
      026           1034.71
      027           2037
      028           1195.36
      029           1713.17
      030            871.352
      031            489.5
     

##### Maximum

In [12]:
# Get the maximum number of trackpoints per user
users, columns = db.find_max_trackpoints()
print("Maximum trackpoints per user")
print(tabulate(users, headers=columns))

Maximum trackpoints per user
  user_id    MAX(trackpoints)
---------  ------------------
      000                2359
      001                2472
      002                2438
      003                2485
      004                2482
      005                2058
      006                2478
      007                2228
      008                2499
      009                2396
      010                1964
      011                2306
      012                2277
      013                2486
      014                2499
      015                2411
      016                2360
      017                2471
      018                2245
      019                2276
      020                2201
      021                 475
      022                2421
      023                2215
      024                2377
      025                2464
      026                2396
      027                2480
      028                2477
      029                2440
      030  

##### Minimum

In [13]:
# Get the minimum number of trackpoints per user
users, columns = db.find_min_trackpoints()
print("Minimum trackpoints per user")
print(tabulate(users, headers=columns))

Minimum trackpoints per user
  user_id    MIN(trackpoints)
---------  ------------------
      000                   5
      001                  33
      002                   4
      003                   3
      004                   4
      005                   5
      006                  14
      007                   6
      008                 165
      009                 134
      010                 663
      011                   3
      012                  64
      013                  13
      014                   3
      015                  22
      016                   7
      017                   4
      018                   3
      019                  11
      020                  22
      021                 475
      022                  15
      023                  31
      024                   4
      025                   3
      026                   9
      027                1760
      028                  32
      029                   4
      030  

### Task 3

Find the top 15 users with the highest number of activities.

In [7]:
query = """
    SELECT RANK() OVER (
        ORDER BY COUNT(*) DESC
    ) Top, user_id, COUNT(*) as num_of_activities FROM Activity GROUP BY user_id ORDER BY COUNT(*) DESC LIMIT 15
    """
db.cursor.execute(query)
rows = db.cursor.fetchall()
columns = db.cursor.column_names
print("Top 15 users with the highest number of activities")
print(tabulate(rows, headers=columns))

Top 15 users with the highest number of activities
  Top    user_id    num_of_activities
-----  ---------  -------------------
    1        025                  715
    2        128                  519
    3        062                  406
    4        041                  399
    5        004                  346
    6        140                  345
    7        017                  265
    8        003                  261
    9        014                  236
   10        030                  210
   11        011                  201
   12        039                  198
   13        034                  180
   14        000                  155
   15        002                  146


### Task 4

Find all users who have taken a bus.

In [2]:
# Get the users who have taken the bus
users, columns = db.get_user_taken_bus()
print("All users who have taken a bus")
print(tabulate(users, headers=columns))

All users who have taken a bus
  user_id
---------
      010
      052
      062
      073
      081
      084
      085
      091
      092
      112
      125
      128
      175


### Task 5

List the top 10 users by their amount of different transportation modes.

In [3]:
users, columns = db.find_top10_transportations_users()
print("Top 10 users by their amount of different transportation modes")
print(tabulate(users, headers=columns))

Top 10 users by their amount of different transportation modes
  user_id    DifferentTransportation
---------  -------------------------
      128                          9
      062                          7
      085                          4
      084                          3
      058                          3
      163                          3
      078                          3
      081                          3
      112                          3
      065                          2


### Task 6

Find activities that are registered multiple times. You should find the query even
if it gives zero result.

In [4]:
query = """
    SELECT a.id FROM Activity AS a WHERE EXISTS 
        (SELECT b.id FROM Activity AS b WHERE a.user_id = b.user_id 
        AND a.transportation_mode = b.transportation_mode AND 
        a.start_date_time = b.start_date_time AND a.end_date_time = b.end_date_time 
        AND a.id != b.id)
    """
program.cursor.execute(query)
rows = program.cursor.fetchall()
print("Find activities that are registered multiple times:\n", rows)

Find activities that are registered multiple times:
 []


### Task 7

a) Find the number of users that have started an activity in one day and ended the activity the next day.

In [17]:
num_users = db.get_num_user_activity_over_a_day()

print("Number of users with activity that ends the next day")
print(tabulate([["Number of users"], [num_users[0]]], headers="firstrow", tablefmt="fancy_grid"))

Number of users with activity that ends the next day
╒═══════════════════╕
│   Number of users │
╞═══════════════════╡
│                66 │
╘═══════════════════╛


b) List the transportation mode, user id and duration for these activities.

In [18]:
users_info = db.get_user_activity_over_a_day()

table = [["User", "Transportation Mode", "Duration"]]
content = [[user[0], user[1], user[2]] for user in users_info]
table.extend(content)

print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒════════╤═══════════════════════╤════════════╕
│   User │ Transportation Mode   │ Duration   │
╞════════╪═══════════════════════╪════════════╡
│    020 │ bike                  │ 10:10:01   │
├────────┼───────────────────────┼────────────┤
│    021 │ walk                  │ 3:57:13    │
├────────┼───────────────────────┼────────────┤
│    058 │ car                   │ 0:36:12    │
├────────┼───────────────────────┼────────────┤
│    062 │ walk                  │ 1:23:44    │
├────────┼───────────────────────┼────────────┤
│    085 │ bus                   │ 0:58:25    │
├────────┼───────────────────────┼────────────┤
│    115 │ car                   │ 1:28:04    │
├────────┼───────────────────────┼────────────┤
│    115 │ car                   │ 1:02:50    │
├────────┼───────────────────────┼────────────┤
│    115 │ car                   │ 1:00:56    │
├────────┼───────────────────────┼────────────┤
│    115 │ car                   │ 1:05:10    │
├────────┼───────────────────────┼──────

### Task 9

Find the top 15 users who have gained the most altitude meters.

In [19]:
query = "SELECT user_id, sum(activity_altitude)*0.304 as altitude_in_meters FROM Activity JOIN (SELECT activity_id, SUM(difference) AS activity_altitude FROM (SELECT activity_id, altitude - LAG(altitude) OVER (PARTITION BY activity_id ORDER BY date_time) AS difference FROM TrackPoint) AS altitude_difference WHERE difference > 0 GROUP BY activity_id) AS difference_table ON Activity.id = difference_table.activity_id GROUP BY user_id ORDER BY altitude_in_meters DESC LIMIT 15"
program.cursor.execute(query)
rows = program.cursor.fetchall()
text = "Find the top 15 users who have gained the most altitude meters\n"
for i in rows:
    text += f"User: {i[0]}, Altitude in meters: {i[1]}\n"
print(text)

### Task 10

Find the users that have traveled the longest total distance in one day for each
transportation mode.

In [None]:
users = db.get_user_with_max_distance()

table = [["User", "Transportation mode", "Distance (km)"]]
table.extend(users)

print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒════════╤═══════════════════════╤═════════════════╕
│   User │ Transportation mode   │   Distance (km) │
╞════════╪═══════════════════════╪═════════════════╡
│    175 │ bus                   │       1.80771   │
├────────┼───────────────────────┼─────────────────┤
│    163 │ taxi                  │      11.3794    │
├────────┼───────────────────────┼─────────────────┤
│    167 │ walk                  │       0.455136  │
├────────┼───────────────────────┼─────────────────┤
│    167 │ bike                  │       4.0598    │
├────────┼───────────────────────┼─────────────────┤
│    128 │ car                   │      18.5745    │
├────────┼───────────────────────┼─────────────────┤
│    062 │ run                   │       0.0332532 │
├────────┼───────────────────────┼─────────────────┤
│    128 │ train                 │      19.0923    │
├────────┼───────────────────────┼─────────────────┤
│    128 │ subway                │      10.8537    │
├────────┼───────────────────────┼────────────

### Task 12

Find all the users who have registered transportation_mode and their most used transportation_mode

Comment on implementation: Although with this dataset, does not need to join User, however in cases where user does not have labels, but still has some activities in Activity with transportation_mode then a join is necessary. For users that have the same number of activities tagged on multiple transportation mode, we've decided to take the first transportation mode in alphabetical order.

In [None]:
query = "SELECT user_id, transportation_mode FROM ( SELECT user_id, transportation_mode, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY COUNT(*) DESC, transportation_mode ASC) AS rownum FROM Activity JOIN User ON Activity.user_id = User.id WHERE transportation_mode IS NOT NULL AND User.has_labels=TRUE GROUP BY user_id, transportation_mode) AS activity_grouped WHERE rownum=1 ORDER BY user_id"
program.cursor.execute(query)
rows = program.cursor.fetchall()
text = "Find all the users who have registered transportation_mode and their most used transportation_mode\n"
for row in rows:
    text += f"User {row[0]}, Transportation mode: {row[1]}\n"
print(text)

### End

In [None]:
# Closing the connection
program.close_connection()