In [1]:
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os.path
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

import patsy


sns.set_style('whitegrid')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [2]:
from sklearn.externals import joblib

In [3]:
ctest = pd.read_csv('/Users/jyots/Desktop/DS_Projects/airbnb_datasets/Paris_IP.csv')

In [4]:
ctest.shape

(41192, 18)

In [5]:
city_names = ["Barcelona", "Rome", "Berlin", "Copenhagen", "Paris"]
cities = {}

dataset_dir = "/Users/jyots/Desktop/DS_Projects/airbnb_DataSets"
model_dir = "/Users/jyots/Desktop/DS_Projects/Models"
    
for city in city_names:
    dataset_path = os.path.join(dataset_dir, "%s_IP.csv" % city)
    model_path = os.path.join(model_dir, "enet_%s.pkl" % city)
    
    dataset = pd.read_csv(dataset_path)
    model = joblib.load(model_path)
    
    dataset.drop('Unnamed: 0', axis=1, inplace=True)
    
    cities[city] = (dataset, model)

In [6]:
#cities

In [7]:
def preprocess(X, neighborhood, bedroom, bathroom, room_type):
    
    X1 = X[0:1]
    X1.loc[0,:] = 0.
    x = X1[[i for i in X1.columns if 'neighborhood' in i]]
 
    tmp = [n for n in x.columns if neighborhood in n]
    #print tmp[0]

    #print X1.loc[0,tmp[0]]
    
    X1.loc[0,tmp[0]] = 1.
    #print X1.loc[0,tmp[0]]
    X1.loc[0,'bathrooms'] = bathroom
    X1.loc[0,'bedrooms'] = bedroom
    X1.loc[0,'dist_arprt'] = np.mean(X['dist_arprt'][X[tmp[0]] == 1])
    X1.loc[0,'host_count'] = np.mean(X['host_count'][X[tmp[0]] == 1])
    X1.loc[0,'metrostn_count'] = np.mean(X['metrostn_count'][X[tmp[0]] == 1])
    X1.loc[0,'minstay'] = np.mean(X['minstay'][X[tmp[0]] == 1])
    X1.loc[0,'overall_satisfaction'] = np.mean(X['overall_satisfaction'][X[tmp[0]] == 1])
    X1.loc[0,'rest_count'] = np.mean(X['rest_count'][X[tmp[0]] == 1])
    X1.loc[0,'review_count'] = np.mean(X['review_count'][X[tmp[0]] == 1])
    X1.loc[0,'reviews'] = np.mean(X['reviews'][X[tmp[0]] == 1])

    if room_type == 'Private room':
        X1.loc[0,'room_type[T.Private room]'] = 1.
    elif room_type == 'Shared room':
        X1.loc[0,'room_type[T.Shared room]'] = 1.
    elif room_type == 'Entire home/apt':
        X1.loc[0,'room_type[T.Private room]'] = 0.
        X1.loc[0,'room_type[T.Shared room]'] = 0.
    
    
    
    #print X1
    return X1


    

In [8]:
def predict_price(city, neighborhood, bedroom, bathroom, room_type):
    print city, neighborhood, bedroom, bathroom, room_type
    
    df, enet = cities[city]
    X = df[[x for x in df.columns if x not in ['bhk','price','latitude','longitude','room_id','residuals']]]
    target = np.log(df.price)
    formula = "target ~ "+' + '.join(X)+' -1'
    y, X = patsy.dmatrices(formula, data=df, return_type='dataframe')
    Xn = pd.DataFrame(StandardScaler().fit_transform(X), columns = X.columns)
    #print type(Xn)
    
    X_final = preprocess(Xn, neighborhood, bedroom, bathroom, room_type)
    #print X_final.shape
    yhat = enet.predict(X_final)
    return np.exp(yhat)
    
    

In [9]:
from ipywidgets import *
from IPython.display import display

In [10]:
def test(city, neighborhood='neighborhood',bedroom=1, bathroom=1, room_type='Entire home/apt'):
    price = predict_price(city, neighborhood, float(bedroom), float(bathroom), room_type)
    print price[0]
    
def do_interact(name):
    city = cities[name][0]
    
    interact(test,
             city = fixed(name),
             neighborhood = city.neighborhood.unique().tolist(),
             bedroom = city.bedrooms.unique().tolist(),
             bathroom = city.bathrooms.unique().tolist(),
             room_type = city.room_type.unique().tolist())
    
    
do_interact("Copenhagen")

Copenhagen Amager East 2.0 1.0 Entire home/apt


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is

(1, 22)
147.438795971


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
