In [None]:
import pandas as pd
import numpy as np
import sqlite3
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import preprocessing as pp
from sklearn.metrics.pairwise import cosine_similarity
import ast


In [None]:
def get_df(table_name):
    try:
        conn = sqlite3.connect('/Users/tristannisbet/Documents/travel_app/places.db')

    except Exception as e:
        print('Error durring connection: ', str(e))
    
    sql = """select * from {}""".format(table_name)
    df = pd.read_sql_query(sql, conn)

    return df

In [None]:
def createFoodDf():
    one = get_df('restaurants_one')
    two = get_df('restaurants_two')
    three = get_df('restaurants_three')
    four = get_df('restaurants_four')
    top_rest = get_df('restaurants')
    
    all_price = pd.concat([one, two, three, four, top_rest], axis =0)
    return(all_price)

In [None]:
def cleaningNullsCity(restaurants_all):
    
    restaurants_all['id'] = pd.to_numeric(restaurants_all.id)
    restaurants_all['price_level'] = restaurants_all['price_level'].fillna(restaurants_all.groupby('city')['price_level'].transform('mean'))
    restaurants_all.fillna(2.0, inplace=True)
    #do I need this?
    restaurants_all['price_level'] = restaurants_all['price_level'].astype(int)
    
    city_food = toCityLevel(restaurants_all)
    city_food.drop(columns = ['avg_price'], inplace=True)
    city_food.fillna(0, inplace=True)
    
    return city_food


In [None]:
def toCityLevel(df):
    city_df = df.groupby(['country', 'city', 'id', 'price_level'])['name'].count().to_frame()
    price_level = city_df.pivot_table(index=['country', 'city', 'id'], columns='price_level', values='name', aggfunc='first')
    price_level['avg_price'] = df.groupby(['country', 'city', 'id'])['price_level'].mean()
    


    
    return price_level

In [None]:
def addNanRowCity(food_df):
    food_df.reset_index(inplace=True)
    nan_row = {'country' : None, 'city': 'Zx', 'id': 200, 1.0: 0, 2.0: 0, 3.0: 0, 4.0: 0}
    food_df = food_df.append(nan_row, ignore_index=True)
    

    
    food_new = labelEncodeCity(food_df)
    
    return food_new



# You might need to return both food and food_new. Food has city/country names

def buildLabelEncoder():
    
    cities = get_df('cities')
    new_row = {'id': 200, 'city': 'Zx', 'country': 'None'}
    cities = cities.append(new_row, ignore_index=True)
    
    le = pp.LabelEncoder()
    le.fit(cities.city)
    
    return le
    
def labelEncodeCity(food_df):
    
    le = buildLabelEncoder()
    food_df['label_id'] = le.transform(food_df.city)
    food_new = selectColumns(food_df)
    
    return food_new


def selectColumns(food_df):
    
    food_df = food_df.drop(food_df[food_df.id == 200].index)
    food_city = food_df[['label_id', 1.0, 2.0, 3.0, 4.0]].copy()
    food_city.sort_values('label_id', inplace=True)
    food_city.set_index('label_id', inplace=True)
    
    return food_city
    

def createFoodUserDf():
    survey = get_df('survey_response')
    food_user = survey[['food_one', 'food_two', 'food_three', 'food_four']]
    
    return food_user



In [None]:
# sim_city_food is similarity matrix for all cities and food data
# sim_user_food is similarity matrix for all user and food data

# cosine_sim_food is similarit matrix for all usersXcities (153x138)
def simScoreFood(food_city, food_user):

    normalized_city = pd.DataFrame(pp.normalize(food_city))    
    normalized_user = pd.DataFrame(pp.normalize(food_user))


    sim_city_food = pd.DataFrame(cosine_similarity(normalized_city))
    sim_user_food = pd.DataFrame(cosine_similarity(normalized_user))
    
    cosine_sim_food = pd.DataFrame(cosine_similarity(normalized_user, normalized_city))

    
    return cosine_sim_food

In [None]:
all_price = createFoodDf()
all_price

In [None]:
all_price = cleaningNullsCity(all_price)
all_price

In [None]:
food_city = addNanRowCity(all_price)
food_city

In [None]:
food_user = createFoodUserDf()
food_user

In [None]:
cosine_sim = simScoreFood(food_city, food_user)
cosine_sim

## Attractions

In [None]:

attractions = get_df('attractions')
attractions.sample(5)   

In [None]:
place_of_worship = ['place_of_worship', 'hindu_temple', 'church', 'mosque', 'synagogue']
shopping = ['store', 'shopping_mall', 'clothing_store', 'electronics_store', 'grocery_or_supermarket', 'department_store']

attractions_to_keep = ['amusement_park', 'museum', 'park', 'art_gallery', 'aquarium',
                      'zoo', 'library', 'movie_theater', 'natural_feature'] + place_of_worship + shopping

In [None]:
def main_call(attraction_df):
    attractions_split = split_types(attraction_df)
    dummy = dummies(attractions_split)
    by_city, all_attractions = attraction_count(dummy, attractions_split)
    city_group = combineAttractionTypes(by_city)
    city_attraction = labelEncodeAttraction(city_group)
    clean_city_attraction, city_attraction = cleanCityAttraction(city_attraction)
    
    return clean_city_attraction, city_attraction

def split_types(df):
    df['split_types'] = [ast.literal_eval(x) for x in df.types]
    df['split_types_str'] = [','.join(x) for x in df.split_types]
    
    return df

def dummies(df):
    dummies = df.split_types_str.str.get_dummies(sep=',')

    return dummies

def attraction_count(dummies_df, all_attractions_df):

    all_attractions_df = pd.concat([all_attractions_df, dummies_df], axis=1)
    type_col_names = attractions_to_keep
    type_col_names.extend(['country', 'city', 'id'])
    attraction_count = all_attractions_df[type_col_names].groupby(['country', 'city', 'id']).sum()
    
    return attraction_count, all_attractions_df

def combineAttractionTypes(city_group):
    city_group['place_of_worship2'] = city_group['place_of_worship'] + city_group['hindu_temple'] + city_group['church'] + city_group['mosque'] + city_group['synagogue']
    city_group['store2'] = city_group['store'] + city_group['shopping_mall'] + city_group['clothing_store'] + city_group['electronics_store'] + city_group['grocery_or_supermarket'] + city_group['department_store']
    
    city_group.rename(columns={"place_of_worship2" : 'place_of_worship', 'store2': 'shop', "place_of_worship" : 'place_of_worship5',}, inplace=True)
    
    city_clean = city_group[['amusement_park', 'art_gallery', 'aquarium', 'library', 'movie_theater',
                              'museum', 'natural_feature', 'park', 'place_of_worship', 'shop', 'zoo']].copy()
    
    return city_clean


def labelEncodeAttraction(city_attraction):
    le = buildLabelEncoder()
    city_attraction.reset_index(inplace=True)
    city_attraction['label_id'] = le.transform(city_attraction.city)
    
    return city_attraction

def cleanCityAttraction(city_attraction):
    city_attraction.sort_values('label_id', inplace=True)
    #city_attraction.reset_index(inplace=True)
    city_attraction.set_index('label_id', inplace=True)
    city_attraction_clean = city_attraction.drop(columns=['city', 'id', 'country'])
    
    return city_attraction_clean, city_attraction


In [None]:
def createAttractionUserDf():
    survey = get_df('survey_response')
    user_attraction = survey[['amusement_park', 'art_gallery', 'aquarium', 'library', 'movie_theater',
                              'museum', 'natural_feature', 'park', 'place_of_worship', 'shop', 'zoo']]
    return user_attraction


def AttractionSimScore(user_attraction, city_attraction):
    
    normalized_user_a = pd.DataFrame(pp.normalize(user_attraction))
    normalized_city_a = pd.DataFrame(pp.normalize(city_attraction))
    

    sim_city_a = pd.DataFrame(cosine_similarity(normalized_city_a))

    cosine_sim_attraction = pd.DataFrame(cosine_similarity(normalized_user_a, normalized_city_a))
    
    
    return cosine_sim_attraction

In [None]:
city_attraction, city_attraction_with_country = main_call(attractions)
user_attraction = createAttractionUserDf()



In [None]:
city_attraction

In [None]:
city_attraction_with_country

In [None]:
cosine_sim_attraction = AttractionSimScore(city_attraction, user_attraction)

In [None]:
cosine_sim_attraction

## Food and Attraction City

In [None]:
def mergeAttractionFood(attraction_df, food_df):
    city = pd.merge(attraction_df, food_df, on = ['country', 'city', 'id'], how = 'outer')
    city.reset_index(inplace=True)
    city['id'] = pd.to_numeric(city['id'])
    city.set_index(['country', 'city', 'id'], inplace=True)
    city.sort_values('id', inplace=True)
    return city

In [None]:
city_attraction_with_country

In [None]:
city = mergeAttractionFood(city_attraction_with_country, all_price)

In [None]:

city_group.xs('Germany', level='country', drop_level=False)