In [None]:
import duckdb
import pandas as pd

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaylimit = 10

pd.set_option("display.precision", 2)

con = duckdb.connect()

%reload_ext sql
%sql con --alias duckdb

## Creating and updating tables

In [None]:
%%sql

-- Create a table "users" with columns "id", "name", and "age"
CREATE TABLE users (id INT, name VARCHAR, age INT);

-- Insert some data into the table
INSERT INTO users VALUES (1, 'Alice', 30);
INSERT INTO users VALUES (2, 'Max', 25);

-- Query the table
SELECT * FROM users;


In [None]:
%%sql

-- Update the age of Alice
UPDATE users SET age = 26 WHERE name = 'Alice';

-- Delete Max
DELETE FROM users WHERE name = 'Max';

-- Select all users again
SELECT * FROM users;

## Importing data

In [None]:
%%sql

-- Do a bulk insert from an external csv file (about ~150mb)
CREATE TABLE taxi_trips AS SELECT * FROM read_csv('./taxi_2010_01_1m.csv')


## Summarizing, Filtering and Aggregating

In [None]:
%%sql

-- Inspect the table using the SUMMARIZE command
FROM (SUMMARIZE taxi_trips)

In [None]:
%%sql

-- How many taxi trips receive more than $10 in tips? And what is the average distance of these trips?
SELECT COUNT(*) as rides, AVG(trip_distance) as avg_distance 
FROM taxi_trips 
WHERE tip_amount > 10;

In [None]:
%%sql

-- What is the average $/mile, grouped by vendor_id?
SELECT vendor_id, AVG(fare_amount) / AVG(trip_distance) as avg_fare_per_mile 
FROM taxi_trips
GROUP BY vendor_id
ORDER BY avg_fare_per_mile DESC;


## `JOIN`:ing Data

In [None]:
%%sql

-- Create a table with some weather information
CREATE OR REPLACE TABLE weather (date DATE, temperature FLOAT, precipitation FLOAT);

-- Insert some data into the table for a few days
INSERT INTO weather VALUES ('2010-01-03', 32, 0.1);
INSERT INTO weather VALUES ('2010-01-02', 35, 0.2);

In [None]:
%%sql

-- Join the taxi trip table with the weather table to get the weather information for each trip
SELECT 
    vendor_id, 
    trip_distance, 
    temperature, 
    precipitation 
FROM 
    taxi_trips 
JOIN 
    weather 
ON 
    taxi_trips.pickup_datetime = weather.date
LIMIT 10;


In [None]:
%%sql

-- Lets join again, but this time compute the average temperature and precipitation for each vendor
SELECT 
    vendor_id, 
    AVG(temperature) as avg_temperature, 
    AVG(precipitation) as avg_precipitation
FROM 
    taxi_trips
JOIN
    weather
ON
    taxi_trips.pickup_datetime = weather.date
GROUP BY vendor_id;