## Data Pull from Calendar table
- Data is recorded as a listing is either added or updated
- We need to find a way to aggregate this data
- Work with the definition of `price` to be used for modelling

In [1]:
%load_ext sql

In [2]:
%sql sqlite:///../data/sample_db/airbnb.db

In [3]:
%sql select * from Calendar limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


calender_id,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
1,40334325,2022-08-03 00:00:00.000000,0,56,56,3,5
2,22742449,2022-11-13 00:00:00.000000,1,95,95,2,99
3,34621717,2022-04-17 00:00:00.000000,0,75,75,2,1125
4,38281744,2022-01-31 00:00:00.000000,1,150,150,1,1000
5,18835003,2022-05-21 00:00:00.000000,0,100,100,2,1125


In [4]:
%%sql
select 
    calender_id, 
    listing_id, 
    Calendar.date, 
    available,
    price, 
    adjusted_price,
    minimum_nights,
    maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26"))as diff 
from Calendar 
limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


calender_id,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights,diff
1,40334325,2022-08-03 00:00:00.000000,0,56,56,3,5,250.0
2,22742449,2022-11-13 00:00:00.000000,1,95,95,2,99,352.0
3,34621717,2022-04-17 00:00:00.000000,0,75,75,2,1125,142.0
4,38281744,2022-01-31 00:00:00.000000,1,150,150,1,1000,66.0
5,18835003,2022-05-21 00:00:00.000000,0,100,100,2,1125,176.0


In [5]:
%%sql
with 
    table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price  from table_diff
where diff<=30 limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,price
10582792,220
41207350,95
16610326,95
42619796,67
49744202,69


### Class Excercise 1
You now need a way to find out the average 30 day price for each listed property, use the baseline query above formatted in sql style below

```sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) - julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price  from table_diff
where diff<=30 limit 5;
```

In [6]:
%%sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
avg(price ) as avg_price_30 from table_diff
where diff<=30
group by listing_id
limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,avg_price_30
50904,150.0
224333,16.0
639013,85.0
778043,80.0
961296,340.0


## Class Excercise 2

Can you think of any way in which you can construct features out of the `available` column. Refer to the below starter code and build on top of it.

```sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price,
available
from table_diff
where diff<=30 
limit 5;
```

In [7]:
%%sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price,
available
from table_diff
where diff<=30 
limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,price,available
10582792,220,0
41207350,95,0
16610326,95,0
42619796,67,0
49744202,69,0


In [8]:
%%sql
with 
    table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
avg(price) as avg_price_30,
avg(available) as avg_availability_30  
from table_diff
where diff<=30 
group by listing_id
order by avg(available) desc
limit 10;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,avg_price_30,avg_availability_30
53928545,49.0,1.0
53912988,25.0,1.0
53898152,75.0,1.0
53553352,60.0,1.0
53357203,300.0,1.0
53183954,200.0,1.0
53124248,180.0,1.0
53030480,55.0,1.0
52938535,150.0,1.0
52881927,55.0,1.0


## Class Problem 3

Use the sample codes below and think of what other features you can extract out of the data

**Sample 1**

```sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when weekday ='6' or weekday='0' then "week_end"
	else "week_day" 
end as day_of_week
from table_diff
where diff<=60
limit 10;
```

**Sample 2**

```sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as day_of_week
from table_diff
where diff<=60
limit 10
```

**Sample 3**
```sql
select listing_id,weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as weekend_availablity
from table_diff
where diff<=60)
limit 5;
```

In [9]:
%%sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when weekday ='6' or weekday='0' then "week_end"
	else "week_day" 
end as day_of_week
from table_diff
where diff<=60
limit 10;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,price,available,date,weekday,day_of_week
31198633,65,0,2022-01-21 00:00:00.000000,5,week_day
51957123,79,1,2022-01-16 00:00:00.000000,0,week_end
39746037,45,0,2022-01-18 00:00:00.000000,2,week_day
4178600,65,0,2021-12-29 00:00:00.000000,3,week_day
8366368,38,0,2021-12-27 00:00:00.000000,1,week_day
36660088,95,0,2022-01-06 00:00:00.000000,4,week_day
13249852,800,0,2022-01-08 00:00:00.000000,6,week_end
2602613,65,0,2021-12-30 00:00:00.000000,4,week_day
10582792,220,0,2021-12-26 00:00:00.000000,0,week_end
32312156,75,1,2022-01-14 00:00:00.000000,5,week_day


In [10]:
%%sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as week_end_availability
from table_diff
where diff<=60
limit 10




 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,price,available,date,weekday,week_end_availability
31198633,65,0,2022-01-21 00:00:00.000000,5,0
51957123,79,1,2022-01-16 00:00:00.000000,0,1
39746037,45,0,2022-01-18 00:00:00.000000,2,0
4178600,65,0,2021-12-29 00:00:00.000000,3,0
8366368,38,0,2021-12-27 00:00:00.000000,1,0
36660088,95,0,2022-01-06 00:00:00.000000,4,0
13249852,800,0,2022-01-08 00:00:00.000000,6,0
2602613,65,0,2021-12-30 00:00:00.000000,4,0
10582792,220,0,2021-12-26 00:00:00.000000,0,0
32312156,75,1,2022-01-14 00:00:00.000000,5,0


In [11]:
%%sql
select listing_id,weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as weekend_availability
from table_diff
where diff<=60)
limit 5;



 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,weekend_availability
31198633,0
51957123,1
39746037,0
4178600,0
8366368,0


**Tentative Solution**

In [12]:
%%sql
select listing_id,avg(day_of_week) as weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as day_of_week
from table_diff
where diff<=60)
group by listing_id
limit 5;

 * sqlite:///../data/sample_db/airbnb.db
Done.


listing_id,weekend_availability
50904,0.2142857142857142
116134,0.0
218916,0.0
224333,0.0
224682,0.0588235294117647
