In [6]:
%%capture
!pip install numpy pandas streamlit gdown currencyconverter
import numpy as np

# For readability purposes, we will disable scientific notation for numbers
np.set_printoptions(suppress=True)
import os
import shutil

import gdown # gdown helps us download dataset from google drive
from numpy import genfromtxt

In [7]:
# Downloading file from Google Drive
# This file is based on data from: http://insideairbnb.com/get-the-data/
file_id_1 = "13fyESiH1ZEnMV6eabAyhe20t4W6peEWK"
downloaded_file_1 = "WK1_Airbnb_Amsterdam_listings_proj.csv"

# Downloading the file from Google Drive
gdown.download(id=file_id_1, output=downloaded_file_1)

Downloading...
From: https://drive.google.com/uc?id=13fyESiH1ZEnMV6eabAyhe20t4W6peEWK
To: C:\Users\Cashapona\WK1_Airbnb_Amsterdam_listings_proj.csv
100%|███████████████████████████████████████████████████████████████████████████████| 246k/246k [00:00<00:00, 1.99MB/s]


'WK1_Airbnb_Amsterdam_listings_proj.csv'

In [8]:
from numpy import genfromtxt

my_data = genfromtxt(downloaded_file_1, delimiter="|", dtype = "unicode")
my_data

array([['', '0', '1', ..., '6170', '6171', '6172'],
       ['id', '23726706', '35815036', ..., '35760705', '36900951',
        '40575103'],
       ['price', '$88.00', '$105.00', ..., '$180.00', '$174.00',
        '$65.00'],
       ['latitude', '52.34916', '52.42419', ..., '52.42624', '52.31983',
        '52.33946'],
       ['longitude', '4.97879', '4.95689', ..., '4.90236', '4.86463',
        '4.95749']], dtype='<U18')

In [9]:
# Remove the first column and row
matrix = my_data[1: , 1:]

# Print out the first four columns
matrix [:, :4]

array([['23726706', '35815036', '31553121', '34745823'],
       ['$88.00', '$105.00', '$152.00', '$87.00'],
       ['52.34916', '52.42419', '52.43237', '52.2962'],
       ['4.97879', '4.95689', '4.91821', '5.01231']], dtype='<U18')

In [10]:
# Transposing the dataset
matrix_T = matrix [:, :4].T
matrix_T

array([['23726706', '$88.00', '52.34916', '4.97879'],
       ['35815036', '$105.00', '52.42419', '4.95689'],
       ['31553121', '$152.00', '52.43237', '4.91821'],
       ['34745823', '$87.00', '52.2962', '5.01231']], dtype='<U18')

In [11]:
# Remove the dollar sign
matrix_T = np.char.replace(matrix_T, "$", "")

In [12]:
# Remove the comma
matrix_T = np.char.replace(matrix_T, ",", "")

In [13]:
# To enable numerical operations we must convert the string/unicode to float32
matrix_T = matrix_T.astype(np.float32)

# Printing out the first fours rows 
matrix_T[:5, :]

array([[23726706.     ,       88.     ,       52.34916,        4.97879],
       [35815036.     ,      105.     ,       52.42419,        4.95689],
       [31553120.     ,      152.     ,       52.43237,        4.91821],
       [34745824.     ,       87.     ,       52.2962 ,        5.01231]],
      dtype=float32)

#### Choose an Airbnb location closest to where you'll spend most of your time

In [14]:
latitude = 52.3581
longitude = 4.8812

In [20]:
def from_location_to_airbnb_listing_in_meters(lat1: float, lon1: float, lat2: list, lon2: list):
    R = 6371000  # Radius of Earth in meters
    phi_1 = np.radians(lat1)
    phi_2 = np.radians(lat2)

    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = (
        np.sin(delta_phi / 2.0) ** 2
        + np.cos(phi_1) * np.cos(phi_2) * np.sin(delta_lambda / 2.0) ** 2
    )

    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    meters = R * c  # Output distance in meters

    return np.round(meters, 0)

#### we've created a function to calculate the distance in meters for every Airbnb listing, we'll perform this calculation on the entire dataset and add the outputs to the matrix as a new column.

In [18]:
convert_vec = np.vectorize(from_location_to_airbnb_listing_in_meters)

In [22]:
meters = from_location_to_airbnb_listing_in_meters(latitude, longitude, matrix_T[:, 2], matrix_T[:, 3])

In [23]:
# Add an axis to make concatenation possible
meters = meters.reshape(-1, 1)

In [24]:
# Append the distance in meters to the matrix
matrix_T = np.concatenate((matrix_T, meters), axis=1)

In [25]:
# Append a color to the matrix
colors = np.zeros(meters.shape)
matrix_T = np.concatenate((matrix_T, colors), axis=1)

In [26]:
# Append our entry to the matrix
fav_entry = np.array([1, 0, 52.3600, 4.8852, 0, 1]).reshape(1, -1) # Change coordinates to your favorite location
matrix_T = np.concatenate((fav_entry, matrix_T), axis=0)

In [27]:
# Entries: airbnb_id, price, latitude, longitude,
# meters from favorite point, color
matrix_T[:5, :]

array([[       1.        ,        0.        ,       52.36      ,
               4.8852    ,        0.        ,        1.        ],
       [23726706.        ,       88.        ,       52.34915924,
               4.97878981,     6702.        ,        0.        ],
       [35815036.        ,      105.        ,       52.42419052,
               4.95689011,     8966.        ,        0.        ],
       [31553120.        ,      152.        ,       52.43236923,
               4.91821003,     8632.        ,        0.        ],
       [34745824.        ,       87.        ,       52.2961998 ,
               5.01231003,    11259.        ,        0.        ]])

In [28]:
# Export the data 
np.savetxt("WK1_Airbnb_Amsterdam_listings_proj_solution.csv", matrix_T, delimiter=",")