In [None]:
import pandas as pd
import numpy as np
import sqlite3
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import preprocessing as pp
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def get_df(table_name):
    try:
        conn = sqlite3.connect('/Users/tristannisbet/Documents/travel_app/places.db')

    except Exception as e:
        print('Error durring connection: ', str(e))
    
    sql = """select * from {}""".format(table_name)
    df = pd.read_sql_query(sql, conn)

    return df

In [None]:
one = get_df('restaurants_one')
two = get_df('restaurants_two')
three = get_df('restaurants_three')
four = get_df('restaurants_four')
top_rest = get_df('restaurants')


In [None]:
all_price = pd.concat([one, two, three, four, top_rest], axis =0)
all_price.info()

In [None]:
all_price['id'] = pd.to_numeric(all_price.id)

In [None]:
def to_city(df):
    city_df = df.groupby(['country', 'city', 'id', 'price_level'])['name'].count().to_frame()
    price_level = city_df.pivot_table(index=['country', 'city', 'id'], columns='price_level', values='name', aggfunc='first')
    price_level['avg_price'] = df.groupby(['country', 'city', 'id'])['price_level'].mean()

    
    return price_level

In [None]:
food = to_city(all_price)
food

In [None]:
food.reset_index(inplace=True)
food.drop(columns = ['avg_price'], inplace=True)
food

In [None]:
le = pp.LabelEncoder()
food['label_id'] = le.fit_transform(food.city)
food

In [None]:
#Reverse encoder
#list(le.inverse_transform(food.label_id))

In [None]:
food_new = food[['label_id', 1.0, 2.0, 3.0, 4.0]].copy()
food_new

In [None]:
food_new.sort_values('label_id', inplace=True)
food_new.set_index('label_id', inplace=True)

In [None]:
food_new.fillna(0, inplace=True)

In [None]:
normalized = pp.normalize(food_new)
normalized_city = pd.DataFrame(normalized)
normalized_city

### Survey / user food data

In [None]:
survey_food = pd.read_csv('/Users/tristannisbet/Documents/SM/survey_food_only.csv', index_col=0)
survey_food

In [None]:
replace_map = {'Never': 0, 'Rarely': 1, 'Sometimes': 2, 'Often': 3, 'Always': 4}

df_food_replace = survey_food.replace(replace_map)

In [None]:
df_food_replace.drop(columns=['nationality', 'age', 'gender'], inplace=True)

In [None]:
normalized_user = pd.DataFrame(pp.normalize(df_food_replace))
normalized_user

In [None]:
cosine_sim = pd.DataFrame(cosine_similarity(normalized_user, normalized_city))
cosine_sim

In [None]:
def find_similar_n(df,n):
    order = np.argsort(df.values, axis=1)[:, :n]
    df = df.apply(lambda x: pd.Series(x.sort_values(ascending=False)
           .iloc[:n].index, 
          index=['top{}'.format(i) for i in range(1, n+1)]), axis=1)
    return df

In [None]:
top_10_city = find_similar_n(cosine_sim,10)
top_10_city

In [None]:
user_1 = top_10_city.iloc[0, 0:5].values.tolist()
user_1

In [None]:
for city in user_1:
    city2 = food[food.label_id == city]
    print(city2.city)

# Attraction Similarity

In [None]:
city_attraction = pd.read_csv('/Users/tristannisbet/Documents/SM/city_attraction_only.csv', index_col=0)
city_attraction

In [None]:
le = pp.LabelEncoder()
city_attraction['label_id'] = le.fit_transform(city_attraction.city)
city_attraction

In [None]:
city_attraction.sort_values('label_id', inplace=True)
city_attraction_clean = city_attraction.copy()
city_attraction_clean.set_index('label_id', inplace=True)
city_attraction_clean.drop(columns=['city', 'id'], inplace=True)
city_attraction_clean

In [None]:
df = pd.read_csv('/Users/tristannisbet/Documents/SM/survey_responses.csv')

In [None]:
df.rename(columns = {'What country are you from? ': 'nationality', 'Age Range': 'age', 'Gender': 'gender',
                    '1. Choose your top favorite 3-5 cities you have traveled to that are on this list.  - Favorite City #1': 'favorite_city_one',
                    '2. Favorite city #2': 'favorite_city_two', '3. Favorite city #3': 'favorite_city_three',
                    '4. Favorite city #4': 'favorite_city_four', '5. Favorite city #5': 'favorite_city_five',
                    "6. If there's a city you have been and loved that is not on this list, add it below. ": 'extra_favorite',
                    "7. What cities on this list have you been to and not enjoyed?   - Least favorite city #1": 'least_favorite_one',
                    '8. Least favorite city #2': 'least_favorite_two', 
                    "9. If there's a city you have been to and haven't liked that is not on this list, add it below": 'extra_least_favorite',
                    "What price range of restaurant do you eat at when you travel? [Price level: 1 (Fast/Cheap Eats)]": 'food_one',
                    "What price range of restaurant do you eat at when you travel? [Price level: 2 (Casual Dining)]": 'food_two',
                    "What price range of restaurant do you eat at when you travel? [Price level: 3 (Upscale Dining)]": 'food_three',
                    "What price range of restaurant do you eat at when you travel? [Price level: 4 (Fine Dining/High End)]": 'food_four',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Art Gallery]": 'art_gallery',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Library]": 'library',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Museum ]": 'museum',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Aquarium]": 'aquarium',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Amusement Park ]": 'amusement_park',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Zoo]": 'zoo',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Movie Theater]": 'movie_theater',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Mall / Souvenir shop ]": 'store',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Park ]": 'park',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Natural Feature / Beach]": 'natural_feature',
                    "From the following types of tourist attractions, which ones are you likely to go do while traveling? [Place of Worship (Church/Temple)]": 'place_of_worship'}, inplace=True )

In [None]:
user_attraction = df[['nationality', 'age', 'gender', 'amusement_park', 'museum', 'park', 'art_gallery', 'aquarium', 'zoo', 
                     'library', 'movie_theater', 'natural_feature', 'place_of_worship', 'store']].copy()
user_attraction

In [None]:
user_attraction.drop(columns=['nationality', 'age', 'gender'], inplace=True)

In [None]:
replace_map = {'1 ( Would NOT go)': 1, '2': 2, '3': 3, '4 (Definitely would go)': 4}

user_attraction = user_attraction.replace(replace_map)


In [None]:
user_attraction

In [None]:
city_attraction_clean

In [None]:
normalized_user_a = pd.DataFrame(pp.normalize(user_attraction))
normalized_user_a

In [None]:
normalized_city_a = pd.DataFrame(pp.normalize(city_attraction_clean))
normalized_city_a

In [None]:
cosine_sim_a = pd.DataFrame(cosine_similarity(normalized_user_a, normalized_city_a))
cosine_sim_a

In [None]:
top_city_a = find_similar_n(cosine_sim_a,10)
top_city_a

In [None]:
user_1 = top_city_a.iloc[0, 0:5].values.tolist()
user_1

In [None]:
for city in user_1:
    city2 = food[food.label_id == city]
    print(city2.city)

In [None]:
top_city_a

In [None]:
top_10_city = top_10_city.add_suffix('food') 

In [None]:
top_city_a = top_city_a.add_suffix('a') 

In [None]:
top_city_a

In [None]:
food[food.label_id == 100]

In [None]:
food.sort_values('label_id')

In [None]:
city_attraction

In [None]:
df

In [None]:
fav_city = df.iloc[:, 4:9].copy()


In [None]:
fav_city

In [None]:
fav_city.sample()

In [None]:
user_29 = fav_city.iloc[29]
user_29

In [None]:
#Currently just using a single user. 
def food_sim(city):
    sim_list = []
    for c in city:
        info = food[food.city == c]
        label = info.label_id
        ok = label.iloc[0]
        sim_score = cosine_sim.iloc[29, ok]
        sim_list.append(sim_score)
    return sim_list_f

def attraction_sim(city):
    sim_list = []
    for c in city:
        info = city_attraction[city_attraction.city == c]
        label = info.label_id
        ok = label.iloc[0]
        sim_score = cosine_sim_a.iloc[29, ok]
        sim_list.append(sim_score)
    return sim_list_a


#Add to a dataframe for each user. 
# Maybe have all sim score, then the top 5 countries as rows after.
# Still need to multiply for higher ranked city. 
def attraction_food_sim(attraction_sim, food_sim):
    dataf = pd.DataFrame()
    sim_df = list( map(add, attraction_sim, food_sim) )
    
    

In [None]:
ok = food_sim(user_29)

In [None]:
oka = attraction_sim(user_29)

In [None]:
from operator import add
bla = list( map(add, ok, oka) )

In [None]:
dff = pd.DataFrame(bla).T

In [None]:
dff

In [None]:
type(oka)

In [None]:
oka

In [None]:
cosine_sim.iloc[29, 71]