In [3]:
import numpy as np

In [9]:
matrix_1 = np.array([[1, 2, 3],[4,5,6]])
matrix_1
matrix_2 = np.array([2,2])

mult = np.multiply(matrix_1, matrix_2 [:, None])
mult

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [11]:
nan1 = np.array([np.NAN, 3])
nan1

array([nan,  3.])

In [17]:
np.set_printoptions(suppress=True)

In [18]:
import os
import shutil

import gdown
from numpy import genfromtxt

# Download file from Google Drive
# This file is based on data from: http://insideairbnb.com/get-the-data/
file_id_1 = "13fyESiH1ZEnMV6eabAyhe20t4W6peEWK"
downloaded_file_1 = "WK1_Airbnb_Amsterdam_listings_proj.csv"

# Download the file from Google Drive
gdown.download(id=file_id_1, output=downloaded_file_1)

Downloading...
From: https://drive.google.com/uc?id=13fyESiH1ZEnMV6eabAyhe20t4W6peEWK
To: /home/kibet/Documents/Colab/WK1_Airbnb_Amsterdam_listings_proj.csv
100%|██████████| 246k/246k [00:00<00:00, 451kB/s]


'WK1_Airbnb_Amsterdam_listings_proj.csv'

In [12]:
from numpy import genfromtxt
data = np.genfromtxt("/home/kibet/Documents/Colab/WK1_Airbnb_Amsterdam_listings_proj.csv", delimiter="|", dtype="unicode")
data[:, :4]

array([['', '0', '1', '2'],
       ['id', '23726706', '35815036', '31553121'],
       ['price', '$88.00', '$105.00', '$152.00'],
       ['latitude', '52.34916', '52.42419', '52.43237'],
       ['longitude', '4.97879', '4.95689', '4.91821']], dtype='<U18')

In [13]:
# Remove first column and row

clean_data = data[1:,1:]
clean_data[:, :5]

array([['23726706', '35815036', '31553121', '34745823', '44586947'],
       ['$88.00', '$105.00', '$152.00', '$87.00', '$160.00'],
       ['52.34916', '52.42419', '52.43237', '52.2962', '52.31475'],
       ['4.97879', '4.95689', '4.91821', '5.01231', '5.0303']],
      dtype='<U18')

In [14]:
#shift the matrix 90 degrees

clean_data = np.transpose(data[1:,1:])

#airbnb_id, price in usd, latitude and longitude
clean_data[:5, :5]

array([['23726706', '$88.00', '52.34916', '4.97879'],
       ['35815036', '$105.00', '52.42419', '4.95689'],
       ['31553121', '$152.00', '52.43237', '4.91821'],
       ['34745823', '$87.00', '52.2962', '5.01231'],
       ['44586947', '$160.00', '52.31475', '5.0303']], dtype='<U18')

In [15]:
# remove $ and comma
clean_data = np.char.replace(clean_data, '$', '')
clean_data = np.char.replace(clean_data, ',', '')
clean_data[:5, :5]

array([['23726706', '88.00', '52.34916', '4.97879'],
       ['35815036', '105.00', '52.42419', '4.95689'],
       ['31553121', '152.00', '52.43237', '4.91821'],
       ['34745823', '87.00', '52.2962', '5.01231'],
       ['44586947', '160.00', '52.31475', '5.0303']], dtype='<U18')

In [16]:
#Check if $ or comma is still present 

clean_data[np.char.find(clean_data, '$') > -1]
clean_data[np.char.find(clean_data, ',') > -1]

array([], dtype='<U18')

In [17]:
# change unicode to float
clean_data = clean_data.astype(np.float32)
clean_data[:5, :5]

array([[2.3726706e+07, 8.8000000e+01, 5.2349159e+01, 4.9787898e+00],
       [3.5815036e+07, 1.0500000e+02, 5.2424191e+01, 4.9568901e+00],
       [3.1553120e+07, 1.5200000e+02, 5.2432369e+01, 4.9182100e+00],
       [3.4745824e+07, 8.7000000e+01, 5.2296200e+01, 5.0123100e+00],
       [4.4586948e+07, 1.6000000e+02, 5.2314751e+01, 5.0303001e+00]],
      dtype=float32)

In [18]:
# Round down the new currency column to 2 decimals
clean_data[:, 1] = np.round(clean_data[:, 1], 2)
clean_data[:, 1]

array([ 88., 105., 152., ..., 180., 174.,  65.], dtype=float32)

In [22]:
# Calculating distance between two location in metres

def calculate_distance(lat1:float, lon1:float, lat2: np.ndarray, lon2: np.ndarray):
    radius = 6378.8
    res1 = lat1/(180*7/22)
    res_1 = lat2/(180*7/22)
    res2 = lon1/(180*7/22)
    res_2 = lon2/(180*7/22)
    d = (radius * np.cos(np.sin(res1) * np.sin(res_1)) + np.cos(res1) 
                        * np.cos(res_1) * np.cos(res2-res_2))
    return d


In [24]:
latitude = 52.3580
longitude = 4.8686

distance = calculate_distance(latitude, longitude, clean_data[:, 2], clean_data[:, 3])
distance

array([5164.7646, 5162.393 , 5162.135 , ..., 5162.328 , 5165.6924,
       5165.0713], dtype=float32)