In [None]:
import pandas as pd
import numpy as np
import sqlite3
import ast
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def get_df(table_name):
    try:
        conn = sqlite3.connect('/Users/tristannisbet/Documents/travel_app/places.db')

    except Exception as e:
        print('Error durring connection: ', str(e))
    
    sql = """select * from {}""".format(table_name)
    df = pd.read_sql_query(sql, conn)

    return df

In [None]:
def find_similar_n(df,n):
    order = np.argsort(df.values, axis=1)[:, :n]
    df = df.apply(lambda x: pd.Series(x.sort_values(ascending=False)
           .iloc[:n].index, 
          index=['top{}'.format(i) for i in range(1, n+1)]), axis=1)
    return df

In [None]:
user_response = pd.read_csv('/Users/tristannisbet/Documents/SM/survey_responses.csv')

In [None]:
user_response.rename(columns = {'What country are you from? ': 'nationality', 'Age Range': 'age', 'Gender': 'gender',
                    '1. Choose your top favorite 3-5 cities you have traveled to that are on this list.  - Favorite City #1': 'favorite_city_one',
                    '2. Favorite city #2': 'favorite_city_two', '3. Favorite city #3': 'favorite_city_three',
                    '4. Favorite city #4': 'favorite_city_four', '5. Favorite city #5': 'favorite_city_five',
                    "6. If there's a city you have been and loved that is not on this list, add it below. ": 'extra_favorite',
                    "7. What cities on this list have you been to and not enjoyed?   - Least favorite city #1": 'least_favorite_one',
                    '8. Least favorite city #2': 'least_favorite_two', 
                    "9. If there's a city you have been to and haven't liked that is not on this list, add it below": 'extra_least_favorite',
                    "What price range of restaurant do you eat at when you travel? [Price level: 1 (Fast/Cheap Eats)]": 'food_one',
                    "What price range of restaurant do you eat at when you travel? [Price level: 2 (Casual Dining)]": 'food_two',
                    "What price range of restaurant do you eat at when you travel? [Price level: 3 (Upscale Dining)]": 'food_three',
                    "What price range of restaurant do you eat at when you travel? [Price level: 4 (Fine Dining/High End)]": 'food_four',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Art Gallery]": 'art_gallery',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Library]": 'library',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Museum ]": 'museum',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Aquarium]": 'aquarium',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Amusement Park ]": 'amusement_park',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Zoo]": 'zoo',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Movie Theater]": 'movie_theater',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Mall / Souvenir shop ]": 'store',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Park ]": 'park',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Natural Feature / Beach]": 'natural_feature',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Place of Worship (Church/Temple)]": 'place_of_worship'}, inplace=True )

In [None]:
user_response.fillna('None', inplace=True)

In [None]:
user_response.drop(columns=['Timestamp', 'extra_favorite', 'extra_least_favorite'], inplace=True)

In [None]:
nationality_dict = {'Australia': 1, 'Canada': 2, 'China': 3, 'Finland': 4, 'Honduras': 5,
              'India': 6, 'Israel': 7, 'Japan': 8, 'Mexico': 9, 'Pakistan': 10, 'Philippines': 11, 'United States': 12}

user_response.nationality = user_response.nationality.map(nationality_dict)


In [None]:
dummy = pd.get_dummies(user_response)


In [None]:
dummy


In [None]:
cosine_user = cosine_similarity(dummy)


In [None]:
# cosine does not have user demographics 
cosine_user = pd.DataFrame(cosine_user)
cosine_user

In [None]:
top_10_user = find_similar_n(cosine_user,10)
top_10_user

## Building city matrix


In [None]:
one = get_df('restaurants_one')
two = get_df('restaurants_two')
three = get_df('restaurants_three')
four = get_df('restaurants_four')
top_rest = get_df('restaurants')
all_food = pd.concat([one, two, three, four, top_rest], axis =0)
all_food.sample(5)

In [None]:
attractions = get_df('attractions')
attractions.sample(5)

In [None]:
def to_city(df):
    city_df = df.groupby(['country', 'city', 'id', 'price_level'])['name'].count().to_frame()
    price_level = city_df.pivot_table(index=['country', 'city', 'id'], columns='price_level', values='name', aggfunc='first')
    price_level['avg_price'] = df.groupby(['country', 'city', 'id'])['price_level'].mean()

    
    return price_level

In [None]:
food = to_city(all_food)
food

In [None]:
place_of_worship = ['place_of_worship', 'hindu_temple', 'church', 'mosque', 'synagogue']
shopping = ['store', 'shopping_mall', 'clothing_store', 'electronics_store', 'grocery_or_supermarket', 'department_store']

attractions_to_keep = ['amusement_park', 'museum', 'park', 'art_gallery', 'aquarium',
                      'zoo', 'library', 'movie_theater', 'natural_feature'] + place_of_worship + shopping

In [None]:
def main_call(attraction_df):
    attractions_split = split_types(attraction_df)
    dummy = dummies(attractions_split)
    by_city, all_attractions = attraction_count(dummy, attractions_split)
    
    return by_city, all_attractions

def split_types(df):
    df['split_types'] = [ast.literal_eval(x) for x in df.types]
    df['split_types_str'] = [','.join(x) for x in df.split_types]
    
    return df

def dummies(df):
    dummies = df.split_types_str.str.get_dummies(sep=',')

    return dummies

def attraction_count(dummies_df, all_attractions_df):
    all_attractions_df = pd.concat([all_attractions_df, dummies_df], axis=1)
    type_col_names = attractions_to_keep
    type_col_names.extend(['country', 'city', 'id'])
    attraction_count = all_attractions_df[type_col_names].groupby(['country', 'city', 'id']).sum()
    
    return attraction_count, all_attractions_df

In [None]:
city_group, all_attractions = main_call(attractions)

In [None]:
all_attractions

In [None]:
city_group

In [None]:
def attraction_food(attraction_df, food_df):
    city = pd.merge(attraction_df, food_df, on = ['country', 'city', 'id'], how = 'outer')
    city.reset_index(inplace=True)
    city['id'] = pd.to_numeric(city['id'])
    city.set_index(['country', 'city', 'id'], inplace=True)
    city.sort_values('id', inplace=True)
    return city

In [None]:
city = attraction_food(food, city_group)

In [None]:
city

In [None]:
# Filling null values so cosine will work
city[city[1.0].isnull()]
city.fillna(0, inplace=True)

In [None]:
cosine_city = pd.DataFrame(cosine_similarity(city))

cosine_city

In [None]:
top_10_city = find_similar_n(cosine_city, 10)
top_10_city

### All user and city matrix completed

In [None]:
def select_favorite():
    for user in user_response.iloc[k]:
        one =  city.iloc[city.index.get_level_values('city') == user_response.iloc[k]['favorite_city_one']]
        two =  city.iloc[city.index.get_level_values('city') == user_response.iloc[k]['favorite_city_two']]
        three =  city.iloc[city.index.get_level_values('city') == user_response.iloc[k]['favorite_city_three']]
        top = pd.concat([one, two, three])
        top = top.stack().to_frame().reset_index(drop=True).T

In [None]:
test = user_response.iloc[0:3].copy()
test

In [None]:
user_response.head()

In [None]:

def build_top_city(city_matrix, user_matrix):
    df = pd.DataFrame([])
    k = 0
    while k < len(user_matrix):
        one =  city_matrix.iloc[city_matrix.index.get_level_values('city') == user_matrix.iloc[k]['favorite_city_one']]
        one.reset_index(inplace=True)
        one = one.add_suffix('_1')
        two =  city_matrix.iloc[city_matrix.index.get_level_values('city') == user_matrix.iloc[k]['favorite_city_two']]
        two.reset_index(inplace=True)
        two = two.add_suffix('_2')
        three =  city_matrix.iloc[city_matrix.index.get_level_values('city') == user_matrix.iloc[k]['favorite_city_three']]
        three.reset_index(inplace=True)
        three = three.add_suffix('_3')
        top = pd.concat([one, two, three])
        top = top.stack().to_frame().T
        top.columns = top.columns.droplevel()
        df = df.append(top)
        df.reset_index(drop=True, inplace=True)

        k += 1
        
    return df
    
    
def build_design_matrix(dummy_response, top_cities):
    finished = pd.concat([dummy_response, top_cities], axis=1)
    return finished

def find_top_city(city_matrix, user_matrix):
    k = 0
    i = 3
    j = '1'
    df = pd.DataFrame([])
    for k in range(len(user_matrix)):
        while i < 6:
            one =  city_matrix.iloc[city_matrix.index.get_level_values('city') == user_matrix.iloc[k][i]]
            one.reset_index(inplace=True)  
            one = one.add_suffix('_'+j)
            i += 1
        
    return one


    
    


In [None]:
top_3 = build_top_city(city, user_response)
top_3

In [None]:
matrix = build_design_matrix(dummy, top_3)
matrix

In [None]:
matrix.iloc[:, 485:]

In [None]:
# Need to remove country/city in the top three matrix before appending.
#cos = cosine_similarity(matrix)