## 6.3 Geographical Visualizations with Python
#### 1.Importing data and libraries
#### 2.Data wrangling
#### 3.Data cleaning
#### 4.Plotting a choropleth with folium

### 1.Importing data and libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [2]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [3]:
# Import ".json" file for Europe cities

city_geo = r'C:\Users\hazem\Master Folder- Airbnb Prices in European Cities\Euro_Cities.json'

In [4]:
city_geo

'C:\\Users\\hazem\\Master Folder- Airbnb Prices in European Cities\\Euro_Cities.json'

In [5]:
f = open(r'C:\Users\hazem\Master Folder- Airbnb Prices in European Cities\Euro_Cities.json')
  
# returns JSON object asa dictionary
data = json.load(f)

# Iterating through the json list
for i in data['features']:
    print(i)

{'type': 'Feature', 'properties': {'id': 0, 'name': 'Sakarya'}, 'geometry': {'type': 'Point', 'coordinates': [30.400002511183743, 40.76666113606262]}}
{'type': 'Feature', 'properties': {'id': 1, 'name': 'Braila'}, 'geometry': {'type': 'Point', 'coordinates': [27.96900353877851, 45.291996145876226]}}
{'type': 'Feature', 'properties': {'id': 2, 'name': 'Al Ladhiqiyah'}, 'geometry': {'type': 'Point', 'coordinates': [35.779975950876974, 35.53998699853099]}}
{'type': 'Feature', 'properties': {'id': 3, 'name': 'Duma'}, 'geometry': {'type': 'Point', 'coordinates': [36.39998978551495, 33.583336404447664]}}
{'type': 'Feature', 'properties': {'id': 4, 'name': 'Mykolayiv'}, 'geometry': {'type': 'Point', 'coordinates': [31.984341999038918, 46.96773907377212]}}
{'type': 'Feature', 'properties': {'id': 5, 'name': 'Chernihiv'}, 'geometry': {'type': 'Point', 'coordinates': [31.30154129916633, 51.50492982899851]}}
{'type': 'Feature', 'properties': {'id': 6, 'name': 'Khmelnytskyy'}, 'geometry': {'type':

In [6]:
##Python shortcut to folder path##
path = r'C:\Users\hazem\Master Folder- Airbnb Prices in European Cities'

In [7]:
## Importing Data Set ##
df = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'AirBnb_Europe_Clean.csv'), index_col = False)

### 2.Data wrangling

In [8]:
df.columns

Index(['Unnamed: 0', 'city', 'days', 'accommodation', 'room_type',
       'person_capacity', 'cleanliness_rating', 'guest_rating', 'bedrooms',
       'city_distance', 'metro_distance', 'longitude', 'latitude',
       'Rating category'],
      dtype='object')

In [9]:
df.shape

(51611, 14)

In [10]:
df.head()

Unnamed: 0.1,Unnamed: 0,city,days,accommodation,room_type,person_capacity,cleanliness_rating,guest_rating,bedrooms,city_distance,metro_distance,longitude,latitude,Rating category
0,0,Amsterdam,Weekdays,194,Private room,2,10,93,1,5.0,2.5,4.90569,52.41772,High rate
1,1,Amsterdam,Weekdays,344,Private room,4,8,85,1,0.5,0.2,4.90005,52.37432,Middle rate
2,2,Amsterdam,Weekdays,264,Private room,2,9,87,1,5.7,3.7,4.97512,52.36103,Middle rate
3,3,Amsterdam,Weekdays,434,Private room,4,9,90,2,0.4,0.4,4.89417,52.37663,High rate
4,4,Amsterdam,Weekdays,486,Private room,2,10,98,1,0.5,0.3,4.90051,52.37508,High rate


In [11]:
## Creating dataset with the needed variables ##

vars_list = ['city', 'accommodation', 'guest_rating', 'city_distance', 'metro_distance', 'Rating category']

In [12]:
vars_list

['city',
 'accommodation',
 'guest_rating',
 'city_distance',
 'metro_distance',
 'Rating category']

In [13]:
## Importing Data Set ##
df_vars = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'AirBnb_Europe_Clean.csv'), usecols = vars_list)

In [14]:
df_vars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51611 entries, 0 to 51610
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   city             51611 non-null  object 
 1   accommodation    51611 non-null  int64  
 2   guest_rating     51611 non-null  int64  
 3   city_distance    51611 non-null  float64
 4   metro_distance   51611 non-null  float64
 5   Rating category  51611 non-null  object 
dtypes: float64(2), int64(2), object(2)
memory usage: 2.4+ MB


In [15]:
df_vars.shape

(51611, 6)

In [16]:
df_vars.head()

Unnamed: 0,city,accommodation,guest_rating,city_distance,metro_distance,Rating category
0,Amsterdam,194,93,5.0,2.5,High rate
1,Amsterdam,344,85,0.5,0.2,Middle rate
2,Amsterdam,264,87,5.7,3.7,Middle rate
3,Amsterdam,434,90,0.4,0.4,High rate
4,Amsterdam,486,98,0.5,0.3,High rate


### 3.Data Cleaning

In [17]:
# Check for missing values

df_vars.isnull().sum()   ## No Missing Values ##

city               0
accommodation      0
guest_rating       0
city_distance      0
metro_distance     0
Rating category    0
dtype: int64

In [18]:
# Check for Duplicate values 

dups = df_vars.duplicated()

In [19]:
dups.shape  ## No Dups ##

(51611,)

### 4.Plotting a choropleth with folium

In [20]:
# Create a data frame with just the cities and the values we want plotted

data_to_plot = df_vars[['city','guest_rating']]
data_to_plot.head()

Unnamed: 0,city,guest_rating
0,Amsterdam,93
1,Amsterdam,85
2,Amsterdam,87
3,Amsterdam,90
4,Amsterdam,98


In [21]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = city_geo, 
    data = data_to_plot,
    columns = ['city', 'guest_rating'],
    key_on = 'feature.properties.name', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "Customer Rating").add_to(map)
folium.LayerControl().add_to(map)

map

In [22]:
df.shape

(51611, 14)

In [25]:
df.columns

Index(['Unnamed: 0', 'city', 'days', 'accommodation', 'room_type',
       'person_capacity', 'cleanliness_rating', 'guest_rating', 'bedrooms',
       'city_distance', 'metro_distance', 'longitude', 'latitude',
       'Rating category'],
      dtype='object')

In [24]:
df.drop(columns = ['Unnamed: 0'])

Unnamed: 0,city,days,accommodation,room_type,person_capacity,cleanliness_rating,guest_rating,bedrooms,city_distance,metro_distance,longitude,latitude,Rating category
0,Amsterdam,Weekdays,194,Private room,2,10,93,1,5.0,2.5,4.90569,52.41772,High rate
1,Amsterdam,Weekdays,344,Private room,4,8,85,1,0.5,0.2,4.90005,52.37432,Middle rate
2,Amsterdam,Weekdays,264,Private room,2,9,87,1,5.7,3.7,4.97512,52.36103,Middle rate
3,Amsterdam,Weekdays,434,Private room,4,9,90,2,0.4,0.4,4.89417,52.37663,High rate
4,Amsterdam,Weekdays,486,Private room,2,10,98,1,0.5,0.3,4.90051,52.37508,High rate
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51606,Vienna,Weekends,716,Entire home/apt,6,10,100,3,0.5,0.1,16.37940,48.21136,High rate
51607,Vienna,Weekends,305,Entire home/apt,2,8,86,1,0.8,0.1,16.38070,48.20296,Middle rate
51608,Vienna,Weekends,637,Entire home/apt,2,10,93,1,1.0,0.2,16.38568,48.20460,High rate
51609,Vienna,Weekends,301,Private room,2,10,87,1,3.0,0.3,16.34100,48.19200,Middle rate


In [27]:
df_airbnb = df.drop(columns = ['Unnamed: 0'])

In [28]:
df_airbnb.shape

(51611, 13)

In [29]:
df_airbnb.columns

Index(['city', 'days', 'accommodation', 'room_type', 'person_capacity',
       'cleanliness_rating', 'guest_rating', 'bedrooms', 'city_distance',
       'metro_distance', 'longitude', 'latitude', 'Rating category'],
      dtype='object')

In [30]:
df_airbnb.to_csv(os.path.join(path, '02 Data', 'Original Data', 'Airbnb_Final.csv'))