In [1]:
from DbConnector import DbConnector
from part2 import Database 
from tabulate import tabulate

# Set up the program
from dotenv import load_dotenv
import os

load_dotenv()

user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')

program = DbConnector(USER=user, PASSWORD=password)
db = Database()

Using user:  common
Connected to: 8.0.34-0ubuntu0.22.04.1
You are connected to the database: ('assignment2',)
-----------------------------------------------

Using user:  common
Connected to: 8.0.34-0ubuntu0.22.04.1
You are connected to the database: ('assignment2',)
-----------------------------------------------



### Task 1

How many users, activities and trackpoints are there in the dataset (after it is inserted into the database)

In [None]:
# Get the number of Users
users = db.get_table_size("User")
activities = db.get_table_size("Activity")
trackpoints = db.get_table_size("TrackPoint")
print("Number of Users, Activities and TrackPoints")
print(tabulate([["Users", "Activities", "TrackPoints"], [users[0], activities[0], trackpoints[0]]], headers="firstrow", tablefmt="fancy_grid"))

### Task 3

Find the top 15 users with the highest number of activities.

In [None]:
query = "SELECT user_id FROM Activity GROUP BY user_id ORDER BY COUNT(*) DESC LIMIT 15"
program.cursor.execute(query)
rows = program.cursor.fetchall()
text = "Top 15 users with the highest number of activities:\n"

for i, val in enumerate(rows):
    text += f"Top {i+1}. {val[0]}\n"
print(text)

### Task 4

Find all users who have taken a bus.

In [8]:
# Get the users who have taken the bus
users = db.get_user_taken_bus()
table = [["User id"]]
table.extend([user for user in users[:10]])
print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒═══════════╕
│   User id │
╞═══════════╡
│       010 │
├───────────┤
│       020 │
├───────────┤
│       052 │
├───────────┤
│       053 │
├───────────┤
│       058 │
├───────────┤
│       062 │
├───────────┤
│       064 │
├───────────┤
│       065 │
├───────────┤
│       067 │
├───────────┤
│       068 │
╘═══════════╛


### Task 6

Find activities that are registered multiple times. You should find the query even
if it gives zero result.

In [None]:
query = "SELECT a.id FROM Activity AS a WHERE EXISTS (SELECT b.id FROM Activity AS b WHERE a.user_id = b.user_id AND a.transportation_mode = b.transportation_mode AND a.start_date_time = b.start_date_time AND a.end_date_time = b.end_date_time AND a.id != b.id)"
program.cursor.execute(query)
rows = program.cursor.fetchall()
print("Find activities that are registered multiple times:\n", rows)

### Task 7

a) Find the number of users that have started an activity in one day and ended the activity the next day.

In [12]:
num_users = db.get_num_user_activity_over_a_day()

print("Number of users with activity that ends the next day")
print(tabulate([["Number of users"], [num_users[0]]], headers="firstrow", tablefmt="fancy_grid"))

Number of users with activity that ends the next day
╒═══════════════════╕
│   Number of users │
╞═══════════════════╡
│                95 │
╘═══════════════════╛


b) List the transportation mode, user id and duration for these activities.

In [13]:
users_info = db.get_user_activity_over_a_day()

table = [["User", "Transportation Mode", "Duration"]]
content = [[user[0], user[1], user[2]] for user in users_info]
table.extend(content)

print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒════════╤═══════════════════════╤════════════╕
│   User │ Transportation Mode   │ Duration   │
╞════════╪═══════════════════════╪════════════╡
│    010 │ train                 │ 23:59:59   │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 11:13:11   │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 7:09:22    │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 12:13:22   │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 11:31:39   │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 9:20:15    │
├────────┼───────────────────────┼────────────┤
│    010 │ train                 │ 1:50:25    │
├────────┼───────────────────────┼────────────┤
│    010 │ bus                   │ 0:08:24    │
├────────┼───────────────────────┼────────────┤
│    010 │ taxi                  │ 0:42:47    │
├────────┼───────────────────────┼──────

### Task 9

Find the top 15 users who have gained the most altitude meters.

In [None]:
query = "SELECT user_id, sum(activity_altitude)*0.304 as altitude_in_meters FROM Activity JOIN (SELECT activity_id, SUM(difference) AS activity_altitude FROM (SELECT activity_id, altitude - LAG(altitude) OVER (PARTITION BY activity_id ORDER BY date_time) AS difference FROM TrackPoint) AS altitude_difference WHERE difference > 0 GROUP BY activity_id) AS difference_table ON Activity.id = difference_table.activity_id GROUP BY user_id ORDER BY altitude_in_meters DESC LIMIT 15"
program.cursor.execute(query)
rows = program.cursor.fetchall()
text = "Find the top 15 users who have gained the most altitude meters\n"
for i in rows:
    text += f"User: {i[0]}, Altitude in meters: {i[1]}\n"
print(text)

### Task 10

Find the users that have traveled the longest total distance in one day for each
transportation mode.

In [14]:
users = db.get_user_with_max_distance()

table = [["User", "Transportation mode", "Distance (km)"]]
table.extend(users)

print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒════════╤═══════════════════════╤═════════════════╕
│   User │ Transportation mode   │   Distance (km) │
╞════════╪═══════════════════════╪═════════════════╡
│    175 │ bus                   │       1.80771   │
├────────┼───────────────────────┼─────────────────┤
│    163 │ taxi                  │      11.3794    │
├────────┼───────────────────────┼─────────────────┤
│    167 │ walk                  │       0.455136  │
├────────┼───────────────────────┼─────────────────┤
│    167 │ bike                  │       4.0598    │
├────────┼───────────────────────┼─────────────────┤
│    128 │ car                   │      18.5745    │
├────────┼───────────────────────┼─────────────────┤
│    062 │ run                   │       0.0332532 │
├────────┼───────────────────────┼─────────────────┤
│    128 │ train                 │      19.0923    │
├────────┼───────────────────────┼─────────────────┤
│    128 │ subway                │      10.8537    │
├────────┼───────────────────────┼────────────

### Task 12

Find all the users who have registered transportation_mode and their most used transportation_mode

Comment on implementation: Although with this dataset, does not need to join User, however in cases where user does not have labels, but still has some activities in Activity with transportation_mode then a join is necessary. For users that have the same number of activities tagged on multiple transportation mode, we've decided to take the first transportation mode in alphabetical order.

In [None]:
query = "SELECT user_id, transportation_mode FROM ( SELECT user_id, transportation_mode, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY COUNT(*) DESC, transportation_mode ASC) AS rownum FROM Activity JOIN User ON Activity.user_id = User.id WHERE transportation_mode IS NOT NULL AND User.has_labels=TRUE GROUP BY user_id, transportation_mode) AS activity_grouped WHERE rownum=1 ORDER BY user_id"
program.cursor.execute(query)
rows = program.cursor.fetchall()
text = "Find all the users who have registered transportation_mode and their most used transportation_mode\n"
for row in rows:
    text += f"User {row[0]}, Transportation mode: {row[1]}\n"
print(text)

### End

In [None]:
# Closing the connection
program.close_connection()