In [1]:
from sqlalchemy import create_engine
from utils import execute_query
engine=create_engine("sqlite:///../data/sample_db/airbnb.db")

In [2]:
query = '''
select * from Calendar limit 5;
'''
execute_query(engine,query)

Unnamed: 0,calender_id,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,1,6418099,2021-12-26 00:00:00.000000,1,48,48,3,1125
1,2,11649861,2021-12-26 00:00:00.000000,0,100,100,1,1125
2,3,11649861,2021-12-27 00:00:00.000000,0,100,100,1,1125
3,4,11649861,2021-12-28 00:00:00.000000,0,100,100,1,1125
4,5,11649861,2021-12-29 00:00:00.000000,0,100,100,1,1125


In [3]:
query = '''
select 
    calender_id, 
    listing_id, 
    Calendar.date, 
    available,
    price, 
    adjusted_price,
    minimum_nights,
    maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26"))as diff 
from Calendar 
limit 5;
'''

In [4]:
execute_query(engine,query)

Unnamed: 0,calender_id,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights,diff
0,1,6418099,2021-12-26 00:00:00.000000,1,48,48,3,1125,30.0
1,2,11649861,2021-12-26 00:00:00.000000,0,100,100,1,1125,30.0
2,3,11649861,2021-12-27 00:00:00.000000,0,100,100,1,1125,31.0
3,4,11649861,2021-12-28 00:00:00.000000,0,100,100,1,1125,32.0
4,5,11649861,2021-12-29 00:00:00.000000,0,100,100,1,1125,33.0


In [5]:
query = '''
with 
    table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price  from table_diff
where diff<=30 limit 5;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,price
0,6418099,48
1,11649861,100
2,50904,150
3,11687167,25
4,11702316,60


### Class Excercise 1
You now need a way to find out the average 30 day price for each listed property, use the baseline query above formatted in sql style below

```sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) - julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price  from table_diff
where diff<=30 limit 5;
```

In [6]:
query = '''with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
avg(price ) as avg_price_30 from table_diff
where diff<=30
group by listing_id
limit 5;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,avg_price_30
0,50904,150.0
1,116134,150.0
2,218916,85.0
3,224333,16.0
4,224682,80.0


## Class Excercise 2

Can you think of any way in which you can construct features out of the `available` column. Refer to the below starter code and build on top of it.

```sql
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price,
available
from table_diff
where diff<=30 
limit 5;
```

In [7]:
query = '''
with 
table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
price,
available
from table_diff
where diff<=30 
limit 5;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,price,available
0,6418099,48,1
1,11649861,100,0
2,50904,150,0
3,11687167,25,0
4,11702316,60,0


In [8]:
query='''
with 
    table_diff as 
    (select 
    calender_id, listing_id, Calendar.date, available,price, adjusted_price,minimum_nights,maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff from Calendar)
select listing_id,
avg(price) as avg_price_30,
avg(available) as avg_availability_30  
from table_diff
where diff<=30 
group by listing_id
order by avg(available) desc
limit 100;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,avg_price_30,avg_availability_30
0,53949105,990.0,1.0
1,53929354,90.0,1.0
2,53928545,49.0,1.0
3,53912988,25.0,1.0
4,53898152,75.0,1.0
...,...,...,...
95,39482529,134.0,1.0
96,39403196,450.0,1.0
97,39330826,800.0,1.0
98,38919382,97.0,1.0


## Class Problem 3

Use the sample codes below and think of what other features you can extract out of the data

**Sample 1**

```sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when weekday ='6' or weekday='0' then "week_end"
	else "week_day" 
end as day_of_week
from table_diff
where diff<=60
limit 10;
```

**Sample 2**

```sql
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as day_of_week
from table_diff
where diff<=60
limit 10
```

**Sample 3**
```sql
select listing_id,weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as weekend_availablity
from table_diff
where diff<=60)
limit 5;
```

In [11]:
query='''
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when weekday ='6' or weekday='0' then "week_end"
	else "week_day" 
end as day_of_week
from table_diff
where diff<=60
limit 10;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,price,available,date,weekday,day_of_week
0,6418099,48,1,2021-12-26 00:00:00.000000,0,week_end
1,11649861,100,0,2021-12-26 00:00:00.000000,0,week_end
2,11649861,100,0,2021-12-27 00:00:00.000000,1,week_day
3,11649861,100,0,2021-12-28 00:00:00.000000,2,week_day
4,11649861,100,0,2021-12-29 00:00:00.000000,3,week_day
5,11649861,100,0,2021-12-30 00:00:00.000000,4,week_day
6,11649861,100,0,2021-12-31 00:00:00.000000,5,week_day
7,11649861,100,1,2022-01-01 00:00:00.000000,6,week_end
8,11649861,100,0,2022-01-02 00:00:00.000000,0,week_end
9,11649861,100,0,2022-01-03 00:00:00.000000,1,week_day


In [18]:
query='''
with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
price,
available,
table_diff.date,
weekday,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as week_end_availability
from table_diff
where diff<=60
limit 10

'''
execute_query(engine,query)

Unnamed: 0,listing_id,price,available,date,weekday,week_end_availability
0,6418099,48,1,2021-12-26 00:00:00.000000,0,1
1,11649861,100,0,2021-12-26 00:00:00.000000,0,0
2,11649861,100,0,2021-12-27 00:00:00.000000,1,0
3,11649861,100,0,2021-12-28 00:00:00.000000,2,0
4,11649861,100,0,2021-12-29 00:00:00.000000,3,0
5,11649861,100,0,2021-12-30 00:00:00.000000,4,0
6,11649861,100,0,2021-12-31 00:00:00.000000,5,0
7,11649861,100,1,2022-01-01 00:00:00.000000,6,1
8,11649861,100,0,2022-01-02 00:00:00.000000,0,0
9,11649861,100,0,2022-01-03 00:00:00.000000,1,0


In [23]:
query = '''
select listing_id,weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as weekend_availability
from table_diff
where diff<=60)
limit 5;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,weekend_availability
0,6418099,1
1,11649861,0
2,11649861,0
3,11649861,0
4,11649861,0


In [20]:
## Tentative Solution
query = '''
select listing_id,avg(day_of_week) as weekend_availability from
(with 
    table_diff as 
    (select 
    calender_id,
	listing_id, 
	Calendar.date,
	available,
	price, 
	adjusted_price,
	minimum_nights,
	maximum_nights,
    julianday(date) -julianday(strftime("%Y-%m-%d","2021-11-26") )
    as diff, strftime('%w',date) as weekday
	from Calendar)
select listing_id,
case 
	when (weekday ='6' and available=1) or (weekday='0' and available=1) then 1
	else 0
end as day_of_week
from table_diff
where diff<=60)
group by listing_id
limit 5;
'''
execute_query(engine,query)

Unnamed: 0,listing_id,weekend_availability
0,50904,0.258065
1,116134,0.032258
2,218916,0.0
3,224333,0.0
4,224682,0.064516
