<a href="https://colab.research.google.com/github/RaquelMichelon/DS_with_Maps/blob/main/DS_Project_using_Map.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Science with Maps
## Learning to plot maps to answer business questions

<img src="https://www.oficinadanet.com.br/imagens/post/27852/google-maps-truques_1400x875_5d8ec1be6e50d.jpg" alt="map draw" width="300"/>

[Airbnb DataSet Source](https://www.kaggle.com/datasets/dgomonov/new-york-city-airbnb-open-data)

In [2]:
#import libs

import pandas as pd
import numpy as np
import io
from google.colab import files
import plotly.express as px #graphs
import folium #maps

In [3]:
#uploaded file -> AB_NYC_2019.csv
file_uploded = files.upload()

Saving AB_NYC_2019.csv to AB_NYC_2019.csv


In [5]:
df = pd.read_csv(io.BytesIO(file_uploded['AB_NYC_2019.csv']))

In [6]:
#drop null rows
df1 = df.dropna()

In [7]:
df1.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,Murray Hill,40.74767,-73.975,Entire home/apt,200,3,74,2019-06-22,0.59,1,129


In [8]:
# filter just availables homes

#remove column where availability_365 = 0

df1 = df1.loc[df1['availability_365'] > 0, :]

In [11]:
#to count by group
cols = ['neighbourhood_group', 'price']
data_plot = df1.loc[:, cols].groupby('neighbourhood_group').max().reset_index() #instead of max, could be min(), sum()...

In [12]:
#plot a graph

px.bar(data_plot, x= 'neighbourhood_group', y = 'price')

# Where is located the apartment which has the maximum price by Neighbourhood group?

In [13]:
#preparing map plot

cols = ['price', 'neighbourhood_group', 'latitude', 'longitude']
data_plot = df1.loc[:, cols].groupby('neighbourhood_group').max().reset_index()

In [15]:
map = folium.Map()
map

In [16]:
data_plot

Unnamed: 0,neighbourhood_group,price,latitude,longitude
0,Bronx,800,40.91306,-73.78158
1,Brooklyn,8000,40.7389,-73.85676
2,Manhattan,9999,40.87665,-73.90855
3,Queens,2600,40.79721,-73.71299
4,Staten Island,625,40.64779,-74.06356


In [18]:
#points inside the map

for index, location_info in data_plot.iterrows():
  folium.Marker([location_info['latitude'], location_info['longitude']],
                popup=location_info['neighbourhood_group']).add_to(map)

map

# Where is located the apartments by its room types?

In [19]:
#where are located the apartments by its types?

#dataset to plot
cols = ['neighbourhood_group', 'room_type', 'latitude', 'longitude']

# df1.loc[rows, columns]
df1.loc[:, cols]


Unnamed: 0,neighbourhood_group,room_type,latitude,longitude
0,Brooklyn,Private room,40.64749,-73.97237
1,Manhattan,Entire home/apt,40.75362,-73.98377
3,Brooklyn,Entire home/apt,40.68514,-73.95976
5,Manhattan,Entire home/apt,40.74767,-73.97500
7,Manhattan,Private room,40.76489,-73.98493
...,...,...,...,...
48782,Manhattan,Private room,40.78099,-73.95366
48790,Queens,Private room,40.75104,-73.81459
48799,Staten Island,Private room,40.54179,-74.14275
48805,Bronx,Entire home/apt,40.80787,-73.92400


In [24]:
# to separate room_types by color 

data_plot = df1.loc[:, cols].sample(100) #to show only 100 rows

# create new column

data_plot.loc[:, 'color'] = 'NA'

In [21]:
data_plot

Unnamed: 0,neighbourhood_group,room_type,latitude,longitude,color
0,Brooklyn,Private room,40.64749,-73.97237,
1,Manhattan,Entire home/apt,40.75362,-73.98377,
3,Brooklyn,Entire home/apt,40.68514,-73.95976,
5,Manhattan,Entire home/apt,40.74767,-73.97500,
7,Manhattan,Private room,40.76489,-73.98493,
...,...,...,...,...,...
48782,Manhattan,Private room,40.78099,-73.95366,
48790,Queens,Private room,40.75104,-73.81459,
48799,Staten Island,Private room,40.54179,-74.14275,
48805,Bronx,Entire home/apt,40.80787,-73.92400,


In [26]:
#sign a color for each room_type
data_plot.loc[data_plot['room_type'] == 'Private room', 'color'] = 'purple'
data_plot.loc[data_plot['room_type'] == 'Entire home/apt', 'color'] = 'darkred'
data_plot.loc[data_plot['room_type'] == 'Shared room', 'color'] = 'darkgreen'
data_plot

Unnamed: 0,neighbourhood_group,room_type,latitude,longitude,color
10378,Bronx,Private room,40.85205,-73.78868,purple
15429,Manhattan,Entire home/apt,40.79505,-73.94361,darkred
42225,Bronx,Private room,40.83988,-73.78287,purple
45122,Staten Island,Private room,40.64535,-74.09255,purple
29691,Brooklyn,Entire home/apt,40.65757,-73.96039,darkred
...,...,...,...,...,...
22419,Manhattan,Entire home/apt,40.76476,-73.99453,darkred
41682,Brooklyn,Entire home/apt,40.68757,-73.91183,darkred
30895,Queens,Private room,40.74309,-73.82472,purple
11646,Manhattan,Entire home/apt,40.80822,-73.94264,darkred


In [27]:
#plot the map

map = folium.Map()

for index, location_info in data_plot.iterrows():
  folium.Marker([location_info['latitude'], 
                 location_info['longitude']],
                 popup=location_info['neighbourhood_group'],
                 icon=folium.Icon(color = location_info['color'])).add_to(map)

map

# Top 10 apartments with more reviews

In [28]:
df1.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,Murray Hill,40.74767,-73.975,Entire home/apt,200,3,74,2019-06-22,0.59,1,129
7,5178,Large Furnished Room Near B'way,8967,Shunichi,Manhattan,Hell's Kitchen,40.76489,-73.98493,Private room,79,2,430,2019-06-24,3.47,1,220


In [33]:

data_plot = df1.loc[:, ['number_of_reviews', 'neighbourhood_group', 'latitude', 'longitude']].groupby('neighbourhood_group').head(10)
data_plot = data_plot.sort_values(['neighbourhood_group', 'number_of_reviews'], ascending=[True, False])

map = folium.Map()

for index, location_info in data_plot.iterrows():
  folium.Marker([location_info['latitude'], 
                 location_info['longitude']], location_info['neighbourhood_group']).add_to(map)

map 

# Top 50 prices

In [35]:
data_plot = df1.loc[:, ['name', 'price', 'latitude', 'longitude']].sort_values(['price'], ascending=[False]).head(50)

map = folium.Map()

for index, location_info in data_plot.iterrows():
  folium.Marker([location_info['latitude'], 
                 location_info['longitude']], location_info['name']).add_to(map)

map 

# Top 10 lowest price by region

In [36]:
data_plot = df1.loc[:, ['number_of_reviews', 'neighbourhood_group', 'latitude', 'longitude']].groupby('neighbourhood_group').tail(10)
data_plot = data_plot.sort_values(['neighbourhood_group', 'number_of_reviews'], ascending=[True, False])

map = folium.Map()

for index, location_info in data_plot.iterrows():
  folium.Marker([location_info['latitude'], 
                 location_info['longitude']], location_info['neighbourhood_group']).add_to(map)

map 