In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import warnings

#mapas interactivos

import folium
from folium.plugins import FastMarkerCluster
import geopandas as gpd
from branca.colormap import LinearColormap

#to make the plotly graphs

import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

#text mining
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from wordcloud import WordCloud


In [6]:
listados = pd.read_csv(r'C:\Users\palom\Desktop\Diego\Bootcamp\Nueva carpeta\airbnb_tokyo\listings_completo.csv\listings.csv')

In [None]:
listados_resumen = pd.read_csv('/content/listings_resumen.csv')

In [None]:
listados_resumen.drop('neighbourhood_group',axis=1,inplace=True)

In [None]:
target_columns =['id', 'neighborhood_overview','host_since','host_location','review_scores_location', 'host_response_time', 'host_response_rate', 'host_acceptance_rate','host_is_superhost','neighbourhood_cleansed','property_type', 'accommodates','beds', 'amenities', 'maximum_nights', 'review_scores_rating']
listings_total = pd.merge(listados_resumen,listados[target_columns],on='id',how='left')
listings_total.info()



In [None]:
listings_total

In [None]:
listings_total['host_response_rate']=pd.to_numeric(listings_total['host_response_rate'].str.strip('%'))

In [None]:
listings_total['host_acceptance_rate']=pd.to_numeric(listings_total['host_acceptance_rate'].str.strip('%'))

In [None]:
listings_total['neighbourhood'].unique()

In [None]:
listings= listings_total[listings_total['neighbourhood'].str.endswith('Ku')]


In [None]:
listings['neighbourhood'].unique()

In [None]:
lati = listings['latitude'].to_list()
longi = listings['longitude'].to_list()
localización = list(zip(lati,longi))

mapa = folium.Map(location=[35.6895,139.69171],zoom_start=10.6)
FastMarkerCluster(data=localización).add_to(mapa)
mapa

In [None]:
vecindario= listings['neighbourhood'].value_counts().sort_values()
ancho_barras = 0.8
vecindario.plot.barh(figsize=(10, 8),color='pink', width=ancho_barras)
plt.title("Número de anuncios por vecindario", fontsize=10)
plt.xlabel('Número de anuncios', fontsize=10)
plt.show()


In [None]:
habit = listings['room_type'].value_counts().sort_values(ascending=True)
ancho_barras = 0.8
habit.plot.barh(figsize=(7, 4),color='pink', width=ancho_barras)
plt.title("Número de anuncios por tipo de habitación", fontsize=10)
plt.xlabel('Número de anuncios', fontsize=10)
plt.show()

In [None]:
listings['property_type'].unique()

In [None]:
prop = listings.groupby(['property_type','room_type']).room_type.count()
prop = prop.unstack()
prop['total']=prop.iloc[:,0:3].sum(axis=1)
prop=prop.sort_values(by=['total'])
prop = prop[prop['total']>=30]
prop = prop.drop(columns=['total'])
prop.plot(kind='barh',stacked=True,color=['red','white','grey','y'],
          linewidth= 1,grid = True,figsize=(10,7),width=0.8)
plt.title('tipo de propiedades en Tokyo')
plt.xlabel('numero de anuncios')
plt.ylabel('')
plt.legend(loc= 4,prop={'size':13})
plt.rc('ytick',labelsize=13)
plt.show()

In [None]:
inq = listings['accommodates'].value_counts().sort_index()
inq.plot.bar(figsize=(10,8),color='b',width=0.8,rot=0)
plt.title('numero de personas',fontsize=20)
plt.ylabel('numero de anuncios',fontsize=12)
plt.show()

In [None]:
tasa_cambio_yen_a_euro = 0.0061874346
precio = listings[listings['accommodates']==2]
precio = precio.groupby('neighbourhood')['price'].mean().sort_values(ascending=True)
precio_en_euros = precio * tasa_cambio_yen_a_euro
precio_en_euros.plot.barh(figsize=(10,8),color='b',width=0.8)
plt.title('precio diario para 2 personas',fontsize=20)
plt.xlabel('precio diario (euros)')
plt.ylabel('')
plt.show()

In [None]:
listings['review_scores_location']

In [None]:
Tok = gpd.read_file('/content/neighbourhoods.geojson')
precio_en_euros = pd.DataFrame([precio_en_euros])
precio_en_euros=precio_en_euros.transpose()
Tok = pd.merge(Tok,precio_en_euros,on='neighbourhood',how='left')
Tok.rename(columns={'price':'average_price'},inplace=True)
Tok.average_price = Tok.average_price.round(decimals=0)

map_dict = Tok.set_index('neighbourhood')['average_price'].to_dict()
color_scale=LinearColormap(['yellow','green'],vmin=min(map_dict.values()),vmax=max(map_dict.values()))

def get_color(feature):
  value = map_dict.get(feature['properties']['neighbourhood'])
  return color_scale(value)

map3=folium.Map(location=[35.6895,139.69171],zoom_start=10.4)
folium.GeoJson(data=Tok,
               name='Tokyo',
               tooltip=folium.features.GeoJsonTooltip(fields=['neighbourhood','average_price'],
                                                       labels=True,
                                                       sticky=False),
               style_function=lambda feature:{
                   'fillColor':get_color(feature),
                   'color':'black',
                   'weight':1,
                   'dashArray':'5,5',
                   'fillOpacity':0.5
                   },
               highlight_function=lambda feature:{'weight':3,'fillColor':get_color(feature),'fillOpacity':0.8}).add_to(map3)
map3

In [None]:
fig = plt.figure(figsize=(20,10))
plt.rc('xtick',labelsize=16)
plt.rc('ytick',labelsize=20)

ax1= fig.add_subplot(121)
feq=listings[listings['number_of_reviews']>=100]
feq1 = feq.groupby('neighbourhood')['review_scores_location'].mean().sort_values(ascending=True)
ax1=feq1.plot.barh(color='b',width=0.8)
plt.title('average review score location(at least 10 review)',fontsize=20)
plt.xlabel('score(scale1-10)',fontsize=20)
plt.ylabel('')

ax2=fig.add_subplot(122)
feq=listings[listings['accommodates']==2]
feq2 = feq.groupby('neighbourhood')['price'].mean().sort_values(ascending=True)
ax2=feq2.plot.barh(color='b',width = 0.8)
plt.title('average daily price for a 2 person accomodation',fontsize=20)
plt.xlabel('average daily price(Euro)',fontsize=20)
plt.ylabel('')

plt.tight_layout()
plt.show()

In [None]:
listings1=listings[listings['number_of_reviews']>=10]
fig = plt.figure(figsize=(20,10))
plt.rc('xtick',labelsize=16)
plt.rc('ytick',labelsize=20)

ax1= fig.add_subplot(321)
feq=listings1['review_scores_location'].value_counts().sort_index()
ax1=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('location',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

ax2=fig.add_subplot(322)
feq=listings1['review_scores_cleanliness'].value_counts().sort_index()
ax2=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('cleanliness',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

ax3=fig.add_subplot(323)
feq=listings1['review_scores_value'].value_counts().sort_index()
ax3=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('value',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

ax4=fig.add_subplot(324)
feq=listings1['review_scores_communication'].value_counts().sort_index()
ax4=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('communication',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

ax5=fig.add_subplot(325)
feq=listings1['review_scores_checkin'].value_counts().sort_index()
ax5=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('arrival',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

ax6=fig.add_subplot(326)
feq=listings1['review_scores_acurracy'].value_counts().sort_index()
ax6=feq.plot.bar(color='b',width=0.8,rot=0)
plt.title('acurracy',fontsize=20)
plt.xlabel('number of listings',fontsize=20)
plt.ylabel('average review score',fontsize=20)

plt.tight_layout()
plt.show()