# 1. Data load

Load the [listings.csv](data/listings.md) file and parse the column types

## 1.1. Init

### 1.1.1. Importing libraries

In [1]:
import json

import pandas as pd

import warnings

%run ./scripts/pd_display.py

### 1.1.2. Configuring libraries

In [2]:
warnings.filterwarnings("ignore") # So it does not return a new error message with the kernel-id for every run at `pd.read_csv` (git)

## 1.2. Defining functions

In [3]:
def tf_to_bool(v):
    "changes t | f to type bool, returns None when neither t or f"
    match v:
        case 't':
            return True
        case 'f':
            return False
        case _:
            return None

## 1.3. Loading data

### 1.3.1. read from CSV

In [4]:
dtype = {
    'listing_id': 'Int64',
    'name': 'string',
    'host_id': 'Int64',
    'host_since': 'string',
    'host_location': 'string',
    'host_response_time': 'category',
    'host_response_rate': 'float',
    'host_acceptance_rate': 'float',
    'host_total_listings_count': 'Int64',
    'neighbourhood': 'category',
    'district': 'category',
    'city': 'category',
    'latitude': 'float',
    'longitude': 'float',
    'property_type': 'category',
    'room_type': 'category',
    'accommodates': 'Int64',
    'bedrooms': 'Int64',
    'price': 'Int64',
    'minimum_nights': 'Int64',
    'maximum_nights': 'Int64',
    'review_scores_rating': 'Int64',
    'review_scores_accuracy': 'Int64',
    'review_scores_cleanliness': 'Int64',
    'review_scores_checkin': 'Int64',
    'review_scores_communication': 'Int64',
    'review_scores_location': 'Int64',
    'review_scores_value': 'Int64',
}

In [5]:
converters = {
    **{x: tf_to_bool for x in ['instant_bookable', 'host_identity_verified', 'host_has_profile_pic', 'host_is_superhost']},
    'amenities': json.loads
}

In [6]:
df = pd.read_csv(
    'data/listings.csv',
    encoding="raw_unicode_escape",  # unicode_escape DOES NOT WORK, because of the json.loads
    dtype=dtype,
    converters=converters
)

#
# Warning: Columns (8,10,11) have mixed types. Specify dtype option on import or set low_memory=False.
#

### 1.3.2. boolean type fixes

Fixing the `DtypeWarning` for the columns `(8,10,11)`, setting those types to type *bool*

In [7]:
for c in ["host_has_profile_pic", "host_identity_verified", "host_is_superhost"]:
    df[c] = df[c].astype(bool)

### 1.3.3. `host_since` to `datetime`

In [8]:
df["host_since"] = pd.to_datetime(df["host_since"], format='%Y-%m-%d')

## 1.4 Store

In [9]:
df

Unnamed: 0,listing_id,name,host_id,host_since,host_location,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood,district,city,latitude,longitude,property_type,room_type,accommodates,bedrooms,amenities,price,minimum_nights,maximum_nights,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable
0,281420,"Beautiful Flat in le Village Montmartre, Paris",1466919,2011-12-03,"Paris, Ile-de-France, France",,,,False,1,True,False,Buttes-Montmartre,,Paris,48.88668,2.33343,Entire apartment,Entire place,2,1,"[Heating, Kitchen, Washer, Wifi, Long term sta...",53,2,1125,100,10,10,10,10,10,10,False
1,3705183,39 mÃÂ² Paris (Sacre CÃâur),10328771,2013-11-29,"Paris, Ile-de-France, France",,,,False,1,True,True,Buttes-Montmartre,,Paris,48.88617,2.34515,Entire apartment,Entire place,2,1,"[Shampoo, Heating, Kitchen, Essentials, Washer...",120,2,1125,100,10,10,10,10,10,10,False
2,4082273,"Lovely apartment with Terrace, 60m2",19252768,2014-07-31,"Paris, Ile-de-France, France",,,,False,1,True,False,Elysee,,Paris,48.88112,2.31712,Entire apartment,Entire place,2,1,"[Heating, TV, Kitchen, Washer, Wifi, Long term...",89,2,1125,100,10,10,10,10,10,10,False
3,4797344,Cosy studio (close to Eiffel tower),10668311,2013-12-17,"Paris, Ile-de-France, France",,,,False,1,True,True,Vaugirard,,Paris,48.84571,2.30584,Entire apartment,Entire place,2,1,"[Heating, TV, Kitchen, Wifi, Long term stays a...",58,2,1125,100,10,10,10,10,10,10,False
4,4823489,Close to Eiffel Tower - Beautiful flat : 2 rooms,24837558,2014-12-14,"Paris, Ile-de-France, France",,,,False,1,True,False,Passy,,Paris,48.85500,2.26979,Entire apartment,Entire place,2,1,"[Heating, TV, Kitchen, Essentials, Hair dryer,...",60,2,1125,100,10,10,10,10,10,10,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279707,38338635,Appartement T2 neuf prÃÂ¨s du tram T3a Porte ...,31161181,2015-04-13,"Paris, Ile-de-France, France",,,,False,1,True,True,Observatoire,,Paris,48.82701,2.31419,Entire apartment,Entire place,2,1,"[Iron, Heating, Washer, Dedicated workspace, E...",120,1,7,100,10,10,10,10,10,10,False
279708,38538692,Cozy Studio in Montmartre,10294858,2013-11-27,"Paris, Ile-de-France, France",,,,False,1,True,True,Buttes-Montmartre,,Paris,48.89309,2.33206,Entire apartment,Entire place,2,1,"[Shampoo, Iron, Heating, Washer, Hair dryer, E...",60,7,15,100,10,10,10,10,10,10,False
279709,38683356,Nice and cosy mini-appartement in Paris,2238502,2012-04-27,"Paris, Ile-de-France, France",,,,False,1,True,True,Buttes-Montmartre,,Paris,48.88699,2.34920,Entire apartment,Entire place,2,1,"[Paid parking off premises, Shampoo, First aid...",50,6,30,100,10,10,10,10,10,10,False
279710,39659000,Charming apartment near Rue Saint Maur / Oberk...,38633695,2015-07-16,"Paris, Ile-de-France, France",,,,False,1,True,True,Popincourt,,Paris,48.86687,2.38123,Entire apartment,Entire place,2,1,"[TV, Iron, Kitchen, Hangers, Smoke alarm, Cabl...",105,3,18,100,10,10,10,10,10,10,False


In [10]:
df.to_pickle("./pickles/001.dataframe.data-load.pkl")