### Dataset and Libraries

In [23]:
import geopandas as gpd


In [24]:
import pandas as pd


In [25]:
import folium
from folium import plugins

In [26]:
airbnb = pd.read_csv(r"C:\Users\behiy\OneDrive\Belgeler\AirbnbIstanbul.csv")

### Exploratory Data Analysis

In [27]:
#Let's examine the data types of the variables in the data set. Do we have lost data?

airbnb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16251 entries, 0 to 16250
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              16251 non-null  int64  
 1   name                            16160 non-null  object 
 2   host_id                         16251 non-null  int64  
 3   host_name                       16244 non-null  object 
 4   neighbourhood_group             0 non-null      float64
 5   neighbourhood                   16251 non-null  object 
 6   latitude                        16251 non-null  float64
 7   longitude                       16251 non-null  float64
 8   room_type                       16251 non-null  object 
 9   price                           16251 non-null  int64  
 10  minimum_nights                  16251 non-null  int64  
 11  number_of_reviews               16251 non-null  int64  
 12  last_review                     

In [28]:
#Ads without comments may actually be houses that have never been taken. Let's see how many comments the ads get.

airbnb.number_of_reviews.value_counts()

0      8484
1      2046
2       958
3       608
4       422
       ... 
147       1
229       1
186       1
213       1
183       1
Name: number_of_reviews, Length: 203, dtype: int64

### Interactive Maps

In [29]:
#Let's show the ads with more than 100 comments on the map.

airbnb100 = airbnb[airbnb.number_of_reviews >= 100]

In [30]:
airbnb100.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
52,140269,Penthouse in Taxim -with 2 bedrooms,683356,Damir,,Beyoglu,41.03577,28.9773,Entire home/apt,295,3,128,2015-12-27,1.38,3,365
62,166265,Apartment in the Heart of the City1,792658,ISlK,,Beyoglu,41.03886,28.98199,Entire home/apt,232,2,119,2019-01-26,1.66,9,358
98,229498,Cozy studio apartment in Cihangir,1167189,Bilgehan,,Beyoglu,41.03077,28.98118,Entire home/apt,364,7,113,2018-09-01,1.3,2,304
106,247874,"Istiklal Avenue,Sadri alisik street",1298849,Berkan,,Beyoglu,41.03433,28.98221,Entire home/apt,58,1,106,2019-02-12,1.26,3,168
112,256825,BLUE Mosque walking&Grand pazar مشي لكل مكان,1351134,Bãśmä,,Fatih,41.01106,28.94568,Private room,100,3,211,2018-10-07,2.58,9,83


In [31]:
#Since we are filtering from our dataset, we see that the index numbers no longer start from 0. 
#Let's fix the index with the reset_index() method for further manipulations.

airbnb100.reset_index(drop = True, inplace = True)
airbnb100.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,140269,Penthouse in Taxim -with 2 bedrooms,683356,Damir,,Beyoglu,41.03577,28.9773,Entire home/apt,295,3,128,2015-12-27,1.38,3,365
1,166265,Apartment in the Heart of the City1,792658,ISlK,,Beyoglu,41.03886,28.98199,Entire home/apt,232,2,119,2019-01-26,1.66,9,358
2,229498,Cozy studio apartment in Cihangir,1167189,Bilgehan,,Beyoglu,41.03077,28.98118,Entire home/apt,364,7,113,2018-09-01,1.3,2,304
3,247874,"Istiklal Avenue,Sadri alisik street",1298849,Berkan,,Beyoglu,41.03433,28.98221,Entire home/apt,58,1,106,2019-02-12,1.26,3,168
4,256825,BLUE Mosque walking&Grand pazar مشي لكل مكان,1351134,Bãśmä,,Fatih,41.01106,28.94568,Private room,100,3,211,2018-10-07,2.58,9,83


In [49]:
#Let's find the latitude and longitude information of Istanbul and have our map drawn.

m = folium.Map(location = [41.015137, 28.979530])

def folium_deepnote_show(m):
    data = m.get_root().render()
    data_fixed_height = data.replace('width: 100%;height: 100%', 'width: 100%').replace('height: 100.0%;', 'height: 609px;', 1)
    display(HTML(data_fixed_height))
    
m

In [50]:
#Let's show the location of the ads in the airbnb100 dataset with a point marker on the map. 
#Let's set the zoom value to 12 and the district names to popup.

m = folium.Map(location = [41.015137, 28.979530], zoom_start=12)

for i in range(len(airbnb100)):
    folium.Marker(location= [airbnb100['latitude'][i], airbnb100['longitude'][i]], popup = airbnb100['neighbourhood'][i]).add_to(m)

def folium_deepnote_show(m):
    data = m.get_root().render()
    data_fixed_height = data.replace('width: 100%;height: 100%', 'width: 100%').replace('height: 100.0%;', 'height: 609px;', 1)
    display(HTML(data_fixed_height))
    
m

Let's color the ads according to their prices on the map we created above and add the district and price information to the popup information.

Let's create a variable named price_range in the airbnb100 dataset and divide the prices of the ads into categories according to the 25% percentile and 75% percentile values:

1. 'Cheap', which is equal to or less than 25% percentile,

2. 'Average' which is greater than 25% percentile and equal to or less than 75% percentile,

3. 'Expensive' which is greater than 75% percentile.

In [51]:
airbnb100["price_range"] = pd.cut(airbnb100.price, bins=[0, 127, 322, 1752], labels=["Cheap", "Average", "Expensive"])
airbnb100.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  airbnb100["price_range"] = pd.cut(airbnb100.price, bins=[0, 127, 322, 1752], labels=["Cheap", "Average", "Expensive"])


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,price_range,tag
0,140269,Penthouse in Taxim -with 2 bedrooms,683356,Damir,,Beyoglu,41.03577,28.9773,Entire home/apt,295,3,128,2015-12-27,1.38,3,365,Average,Neighborhood:Beyoglu Price:295
1,166265,Apartment in the Heart of the City1,792658,ISlK,,Beyoglu,41.03886,28.98199,Entire home/apt,232,2,119,2019-01-26,1.66,9,358,Average,Neighborhood:Beyoglu Price:232
2,229498,Cozy studio apartment in Cihangir,1167189,Bilgehan,,Beyoglu,41.03077,28.98118,Entire home/apt,364,7,113,2018-09-01,1.3,2,304,Expensive,Neighborhood:Beyoglu Price:364
3,247874,"Istiklal Avenue,Sadri alisik street",1298849,Berkan,,Beyoglu,41.03433,28.98221,Entire home/apt,58,1,106,2019-02-12,1.26,3,168,Cheap,Neighborhood:Beyoglu Price:58
4,256825,BLUE Mosque walking&Grand pazar مشي لكل مكان,1351134,Bãśmä,,Fatih,41.01106,28.94568,Private room,100,3,211,2018-10-07,2.58,9,83,Cheap,Neighborhood:Fatih Price:100


Let's create a variable named "tag" in the airbnb100 dataset and combine the Neighborhood: (district) and Price: information for each ad.

In [52]:
tag = []
for i in range(len(airbnb100)):
    tag.append("Neighborhood:" + str(airbnb100.neighbourhood[i]) + 
               " " + 
               "Price:" + str(airbnb100.price[i]))

airbnb100["tag"] = tag
airbnb100.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  airbnb100["tag"] = tag


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,price_range,tag
0,140269,Penthouse in Taxim -with 2 bedrooms,683356,Damir,,Beyoglu,41.03577,28.9773,Entire home/apt,295,3,128,2015-12-27,1.38,3,365,Average,Neighborhood:Beyoglu Price:295
1,166265,Apartment in the Heart of the City1,792658,ISlK,,Beyoglu,41.03886,28.98199,Entire home/apt,232,2,119,2019-01-26,1.66,9,358,Average,Neighborhood:Beyoglu Price:232
2,229498,Cozy studio apartment in Cihangir,1167189,Bilgehan,,Beyoglu,41.03077,28.98118,Entire home/apt,364,7,113,2018-09-01,1.3,2,304,Expensive,Neighborhood:Beyoglu Price:364
3,247874,"Istiklal Avenue,Sadri alisik street",1298849,Berkan,,Beyoglu,41.03433,28.98221,Entire home/apt,58,1,106,2019-02-12,1.26,3,168,Cheap,Neighborhood:Beyoglu Price:58
4,256825,BLUE Mosque walking&Grand pazar مشي لكل مكان,1351134,Bãśmä,,Fatih,41.01106,28.94568,Private room,100,3,211,2018-10-07,2.58,9,83,Cheap,Neighborhood:Fatih Price:100


Let's draw our map by coloring the markers so that cheap ads are green, average ones are blue, and expensive ones are red. Let's add the 'home' icon to the tokens.

In [53]:
m = folium.Map(location = [41.015137, 28.979530], zoom_start=12)

for i in range(len(airbnb100)):
    if airbnb100['price_range'][i] == 'Cheap':
        folium.Marker(location= [airbnb100['latitude'][i], airbnb100['longitude'][i]], popup = airbnb100['tag'][i], icon = folium.Icon(color = 'green', icon = 'home', icon_color='black')).add_to(m)
    elif airbnb100['price_range'][i] == 'Average':
        folium.Marker(location= [airbnb100['latitude'][i], airbnb100['longitude'][i]], popup = airbnb100['tag'][i], icon = folium.Icon(color = 'blue', icon = 'home')).add_to(m)
    else:
        folium.Marker(location= [airbnb100['latitude'][i], airbnb100['longitude'][i]], popup = airbnb100['tag'][i], icon = folium.Icon(color = 'red', icon = 'home')).add_to(m)

        
def folium_deepnote_show(m):
    data = m.get_root().render()
    data_fixed_height = data.replace('width: 100%;height: 100%', 'width: 100%').replace('height: 100.0%;', 'height: 609px;', 1)
    display(HTML(data_fixed_height))
    
m

In [54]:
#Let's draw a heatmap of the number of comments according to the airbnb dataset.

m = folium.Map(location = [41.015137, 28.979530])

def folium_deepnote_show(m):
    data = m.get_root().render()
    data_fixed_height = data.replace('width: 100%;height: 100%', 'width: 100%').replace('height: 100.0%;', 'height: 609px;', 1)
    display(HTML(data_fixed_height))
    

plugins.HeatMap(data = airbnb[['latitude', 'longitude', 'number_of_reviews']]).add_to(m)
m

Let's plot a bubble chart with the airbnb100 dataset:
1. Color the bubbles according to the room type.
2. Set the information in the label column that we created before as a popup.
3. Adjust the bubble sizes according to the advertised price.

In [38]:
#Let's look at the categories of the room_type variable in the airbnb100 dataset

airbnb100.room_type.unique()


array(['Entire home/apt', 'Private room', 'Shared room'], dtype=object)

Let's create a map of Istanbul. Let's plot the bubble graph:
    
1. Show 'Entire home/apt' ads with green bubbles, 'Private room' ads with blue and 'Shared room' ads with red bubbles.

2. Adjust the bubble sizes according to the advertised price. (Hint: airbnb100['price'][i]/5)

3. Add the information in the tag variable to the popups.

In [39]:
m = folium.Map(location = [41.015137, 28.979530], zoom_start=12)

for i in range(len(airbnb100)):
    if airbnb100['room_type'][i] == 'Entire home/apt':
        folium.Circle(
            radius=airbnb100['price'][i]/5,
            location=[airbnb100['latitude'][i], airbnb100['longitude'][i]],
            popup=airbnb100['tag'][i],
            color='green',
            fill=True,
            ).add_to(m)
    if airbnb100['room_type'][i] == 'Private room':
        folium.Circle(
            radius=airbnb100['price'][i]/5,
            location=[airbnb100['latitude'][i], airbnb100['longitude'][i]],
            popup=airbnb100['tag'][i],
            color='blue',
            fill=True,
            ).add_to(m)
    if airbnb100['room_type'][i] == 'Shared room':
        folium.Circle(
            radius=airbnb100['price'][i]/5,
            location=[airbnb100['latitude'][i], airbnb100['longitude'][i]],
            popup=airbnb100['tag'][i],
            color='red',
            fill=True,
            ).add_to(m)


            
m

Let's show the average prices of the ads with 1 or more comments in the airbnb dataset, according to the districts, on the color toned map (choropleth).

In [40]:
turkey = gpd.read_file('https://raw.githubusercontent.com/tahasarnic/ilceler/master/turkiye-ilceler.geojson')

In [41]:
#Let's import the polygon data in geojson format with the name turkey. Let's convert the Istanbul district names in the polygon 
#data to the format in the airbnb data, that is, to the format without Turkish characters.

turkey.name.replace(to_replace = ['Üsküdar', 'Beşiktaş', 'Beyoğlu', 'Şişli', 'Sarıyer', 'Beykoz','Ataşehir', 'Fatih', 'Adalar',
                                  'Kadıköy', 'Kağıthane', 'Maltepe', 'Bakırköy', 'Esenyurt', 'Başakşehir', 'Kartal', 
                                  'Gaziosmanpaşa', 'Bahçelievler', 'Bağcılar', 'Büyükçekmece', 'Silivri', 'Beylikdüzü', 
                                  'Ümraniye', 'Şile', 'Çekmeköy', 'Sancaktepe', 'Tuzla', 'Pendik', 'Sultangazi', 'Eyüp', 
                                  'Zeytinburnu', 'Küçükçekmece', 'Avcılar', 'Güngören', 'Çatalca', 'Bayrampaşa', 'Esenler', 
                                  'Sultanbeyli', 'Arnavutköy'], value = ['Uskudar', 'Besiktas', 'Beyoglu', 'Sisli', 'Sariyer', 
                                  'Beykoz','Atasehir', 'Fatih', 'Adalar', 'Kadikoy', 'Kagithane', 'Maltepe', 'Bakirkoy', 
                                  'Esenyurt', 'Basaksehir', 'Kartal', 'Gaziosmanpasa', 'Bahcelievler', 'Bagcilar', 'Buyukcekmece', 
                                  'Silivri', 'Beylikduzu', 'Umraniye', 'Sile', 'Cekmekoy', 'Sancaktepe', 'Tuzla', 'Pendik', 
                                  'Sultangazi', 'Eyup', 'Zeytinburnu', 'Kucukcekmece', 'Avcilar', 'Gungoren', 'Catalca', 
                                  'Bayrampasa', 'Esenler', 'Sultanbeyli', 'Arnavutkoy'],  inplace = True)

In [42]:
#Let's filter the airbnb dataset so that the number of comments is equal to or greater than 1. Let's group by districts 
#(neighbourhood) and find the average of prices. Let's convert our output to DataFrame and reset the index. Let's assign 
#a new DataFrame with the name airbnb_avg_price.

airbnb_avg_price = airbnb[airbnb.number_of_reviews >= 1].groupby("neighbourhood")["price"].mean().to_frame().reset_index()
airbnb_avg_price

Unnamed: 0,neighbourhood,price
0,Adalar,385.527273
1,Arnavutkoy,1202.0
2,Atasehir,235.866667
3,Avcilar,202.388889
4,Bagcilar,311.633333
5,Bahcelievler,153.646341
6,Bakirkoy,233.150685
7,Basaksehir,394.552632
8,Bayrampasa,104.0
9,Besiktas,265.039451


Our airbnb_avg_price data includes only the advertisements in Istanbul, but our turkey district polygon data includes the information of all districts in Turkey. So, let's combine the airbnb_avg_price and turkey datasets with the left method to create a dataset called ilce, which has polygon data only for Istanbul districts. Since the output of the merge operation will be Pandas DataFrame, let's convert it to GeoDataFrame format so that it can be read as polygon data.

In [43]:
district = gpd.GeoDataFrame(airbnb_avg_price.merge(turkey, left_on= 'neighbourhood', right_on = 'name', how = "left"))
district.head()

Unnamed: 0,neighbourhood,price,synonyms,icon,extrude,visibility,end,begin,timestamp,description,name,drawOrder,altitudeMode,tessellate,geometry
0,Adalar,385.527273,İstanbul|Marmara|Marmara Bölgesi|ADALAR,,0,1,,,,District,Adalar,,,-1,"POLYGON ((29.12958 40.87569, 29.12458 40.83792..."
1,Arnavutkoy,1202.0,İstanbul|Marmara Bölgesi|ARNAVUTKOY,,0,1,,,,District,Arnavutkoy,,,-1,"POLYGON ((28.83302 41.13262, 28.80082 41.14194..."
2,Atasehir,235.866667,İstanbul|Marmara Bölgesi|ATASEHIR,,0,1,,,,District,Atasehir,,,-1,"POLYGON ((29.06557 41.01101, 29.07783 41.02299..."
3,Avcilar,202.388889,İstanbul|Marmara Bölgesi|AVCILAR,,0,1,,,,District,Avcilar,,,-1,"POLYGON ((28.76738 40.99284, 28.75876 40.97958..."
4,Bagcilar,311.633333,İstanbul|Marmara Bölgesi|BAGCILAR,,0,1,,,,District,Bagcilar,,,-1,"POLYGON ((28.87146 41.03859, 28.85819 41.02142..."


In [44]:
#Let's draw our color toned map (choropleth) to show the district names when hovered over it.

#Intervals

bins = [75, 185, 236, 355, 640, 827, 1015, 1205]

m = folium.Map(location = [41.015137, 28.979530], zoom_start = 9)

#Color Tone Map
choropleth = folium.Choropleth(
    geo_data = district,
    data=airbnb_avg_price,
    columns=['neighbourhood', 'price'],
    key_on='feature.properties.name',
    fill_color='OrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    bins = bins,
    legend_name='Average Price by District',
    highlight = True
).add_to(m)

choropleth.geojson.add_child(
        folium.features.GeoJsonTooltip(['name'], labels = False)
)

m