In [1]:
from cleaning_functions import (
    load_and_clean_calendar, calculate_mean_price, load_and_clean_listings, 
    merge_with_average_price, set_display_options, save_to_csv,
    create_civitatis_airbnb_listing, create_civitatis_airbnb_reviews, 
    create_civitatis_airbnb_host, export_tables_to_csv
)
import pandas as pd
from IPython.display import display

# Define the columns to select for the listings data
select_columns = ["id", "host_id", "host_name", "host_since", "host_location", 
                  "host_response_time", "host_response_rate", "host_is_superhost", 
                  "host_listings_count", "longitude", "latitude", "room_type", 
                  "accommodates", "number_of_reviews", "number_of_reviews_ltm", 
                  "number_of_reviews_l30d", "review_scores_rating", "instant_bookable", 
                  "reviews_per_month"]

# Load and clean the calendar data
df_calendar = load_and_clean_calendar("calendar.csv")

# Calculate the mean price for each listing_id
calendar_listing_price_mean_df = calculate_mean_price(df_calendar)

# Load and clean the listings data
df_listing_clean = load_and_clean_listings("listings.csv", select_columns)

# Merge cleaned listings data with the calculated average price data
df_listing_clean_price = merge_with_average_price(df_listing_clean, calendar_listing_price_mean_df)

# Configure pandas display options
set_display_options()

# Save the merged DataFrame with cleaned prices to a CSV file
save_to_csv(df_listing_clean_price, "df_listing_clean_price.csv")

# Display the first 10 rows of the cleaned and merged DataFrame
display(df_listing_clean_price.head(10))

# Create the new tables
civitatis_airbnb_listing = create_civitatis_airbnb_listing(df_listing_clean_price)
civitatis_airbnb_reviews = create_civitatis_airbnb_reviews(df_listing_clean_price)
civitatis_airbnb_host = create_civitatis_airbnb_host(df_listing_clean_price)

# Export the new tables to CSV
export_tables_to_csv(civitatis_airbnb_listing, civitatis_airbnb_reviews, civitatis_airbnb_host)

# Display the first 5 rows of the new tables
display(civitatis_airbnb_listing.head())
display(civitatis_airbnb_reviews.head())
display(civitatis_airbnb_host.head())


Unnamed: 0,listing_id,host_id,host_name,host_since,host_location,host_response_time,host_response_rate,host_is_superhost,host_listings_count,longitude,latitude,room_type,accommodates,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,instant_bookable,reviews_per_month,base_price
0,638401413782978130,382146507,Lovely,2020-12-29,"Málaga, Spain",within an hour,100%,f,72,-4.420914,36.722069,Entire home/apt,4,100,58,4,4.97,t,3.99,160
1,593807685540946920,11733642,Airgestión,2014-01-28,Spain,within an hour,100%,f,21,-4.41326,36.72718,Entire home/apt,7,12,4,1,5.0,t,0.46,180
2,689209787389564671,13967638,Susanna,2014-04-06,"Málaga, Spain",within a few hours,100%,f,2,-4.412884,36.72771,Entire home/apt,4,11,6,0,4.73,f,0.6,85
3,1169368107504912867,4884962,"Esther, Francisco & Sara",2013-01-28,"Andalusia, Spain",within an hour,100%,f,23,-4.420821,36.719241,Entire home/apt,10,0,0,0,Unknown,t,Unknown,191
4,53218911,413971511,Montana,2021-07-19,"Andalusia, Spain",within an hour,95%,f,22,-4.43439,36.73737,Entire home/apt,2,17,4,0,3.47,t,0.53,50
5,52295523,422798059,Carlos Alberto,2021-09-13,"Málaga, Spain",within an hour,100%,t,3,-4.41858,36.72423,Entire home/apt,2,122,64,9,4.89,t,4.44,85
6,1105849600091052946,446668777,Rodo,2022-02-24,"Málaga, Spain",within an hour,100%,f,1,-4.420344,36.723985,Entire home/apt,2,18,18,3,4.83,f,6.21,70
7,31748629,126386697,Lu&Cia,2017-04-18,"Málaga, Spain",within an hour,100%,f,44,-4.42382,36.71419,Entire home/apt,2,26,6,1,4.62,t,0.39,85
8,1151936773590880075,399961266,Gestitur,2021-05-04,Unknown,within an hour,100%,f,62,-4.428972,36.714127,Entire home/apt,4,3,3,1,5.0,t,1.73,400
9,701861352072019656,428065882,Maria,2021-10-19,Unknown,Unknown,Unknown,f,7,-4.4209,36.7208,Entire home/apt,2,1,1,0,5.0,t,0.16,200


Unnamed: 0,listing_id,host_id,longitude,latitude,room_type,accommodates,instant_bookable,base_price
0,638401413782978130,382146507,-4.420914,36.722069,Entire home/apt,4,t,160
1,593807685540946920,11733642,-4.41326,36.72718,Entire home/apt,7,t,180
2,689209787389564671,13967638,-4.412884,36.72771,Entire home/apt,4,f,85
3,1169368107504912867,4884962,-4.420821,36.719241,Entire home/apt,10,t,191
4,53218911,413971511,-4.43439,36.73737,Entire home/apt,2,t,50


Unnamed: 0,reviews_id,listing_id,number_of_reviews,reviews_lastyear,reviews_last30days,review_scores_rating
0,0,638401413782978130,100,58,4,4.97
1,1,593807685540946920,12,4,1,5.0
2,2,689209787389564671,11,6,0,4.73
3,3,1169368107504912867,0,0,0,Unknown
4,4,53218911,17,4,0,3.47


Unnamed: 0,host_id,host_name,host_since,host_location,host_response_time,host_is_superhost,host_listings_count
0,382146507,Lovely,2020-12-29,"Málaga, Spain",within an hour,f,72
1,11733642,Airgestión,2014-01-28,Spain,within an hour,f,21
2,13967638,Susanna,2014-04-06,"Málaga, Spain",within a few hours,f,2
3,4884962,"Esther, Francisco & Sara",2013-01-28,"Andalusia, Spain",within an hour,f,23
4,413971511,Montana,2021-07-19,"Andalusia, Spain",within an hour,f,22
