In [24]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Model
import sys

In [96]:
class EmbModel(tf.keras.Model):
    def __init__(self, useridlength, category_length):
        super(EmbModel, self).__init__()
        self.d_steps = 1
        self.useridlength = useridlength
        self.category_length = category_length
        self.model = self.init_model()
        print(self.useridlength)
        
    def call(self, inputs):
        return
    
    def init_model(self):
        poi_latitude_input = keras.layers.Input(shape=(1,), name='poi_latitude')
        poi_longitude_input = keras.layers.Input(shape=(1,), name='poi_longitude')
        poi_concat_input = tf.keras.layers.Concatenate(axis=-1)([poi_latitude_input, poi_longitude_input])
        #input_length:  #This is the length of input sequences, as you would define for any input layer of a Keras model. 
                        #For example, if all of your input documents are comprised of 1000 words, this would be 1000
        #input_dim: 
                        #This is the size of the vocabulary in the text data. 
                        #For example, if your data is integer encoded to values between 0-10, then the size of the vocabulary would be 11 words.
        poi_dense = keras.layers.Dense(128)(poi_concat_input)
        poi_reshape = keras.layers.Reshape((1, 128))(poi_dense)
        
        category_input = keras.layers.Input(shape=(1), name='category_input')
        category_emb = keras.layers.Embedding(self.category_length, 128)(category_input)    
        category_concat = tf.keras.layers.Concatenate(axis=-1)([category_emb, poi_reshape])
    
        user_input = keras.layers.Input(shape=(1,), name='user_id')
        user_emb = keras.layers.Embedding(self.useridlength, 256)(user_input)
        #user_reshape = layers.Reshape((1, 256))(user_emb)
                                    
        dot = keras.layers.Dot(axes=(2))([category_concat, user_emb])
            
        model = Model([category_input, poi_latitude_input, poi_longitude_input, user_input], dot)
        model.summary()
        return model
    
    def compile_model(self, optimizer):
        super(EmbModel, self).compile(run_eagerly=True)
        self.optimizer = optimizer
        
    def train_step(self, data):
        sample_weight = None
        real_data = data
        user_data = real_data[0]
        lat_data = real_data[1]
        long_data = real_data[2]
        labels = real_data[3]
        cat_data = real_data[4]
        

        for i in range(self.d_steps):
            with tf.GradientTape() as tape:
                
                #print(latlong_data[0])
                #print(latlong_data[1])
                #print(user_data)
                
                dotproduct = self.model([cat_data, lat_data, long_data, user_data])
                print(dotproduct)
                #print(dotproduct)
                # Loss function = ||S-GroundTruth|| 
                loss = tf.math.abs(tf.subtract(tf.cast(dotproduct, tf.float64), labels))
                #print(loss)
            d_gradient = tape.gradient(loss, self.model.trainable_variables)
            self.optimizer.apply_gradients(zip(d_gradient, self.model.trainable_variables))
        return {'loss': loss}
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.out_units)
    
    def predict_step(self, data):
        sample_weight = None
        cat_data = real_data[0]
        lat_data = real_data[1]
        long_data = real_data[2]
        user_data = real_data[3]
        return self.model([cat_data, lat_data, long_data, user_data])

In [98]:
print(dataset)
model = EmbModel(len(users), category_length)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8)

model.compile(
    optimizer
)

#Train_data, [dataset.user_id, dataset.poi_id]. Label: ground_truth
model.fit(dataset, epochs = 25, batch_size=27)

tf.Tensor(
[[100.        55.62852   12.647297   0.       147.      ]
 [139.        55.703993  12.538325   0.        89.      ]
 [ 80.        55.674379  12.55062    0.       224.      ]
 ...
 [ 19.        55.677101  12.57536    0.       114.      ]
 [ 16.        55.629631  12.64919    0.        10.      ]
 [ 31.        55.677083  12.58322    0.       175.      ]], shape=(26239, 5), dtype=float64)
Model: "functional_43"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
poi_latitude (InputLayer)       [(None, 1)]          0                                            
__________________________________________________________________________________________________
poi_longitude (InputLayer)      [(None, 1)]          0                                            
___________________________________________________________________________________

InvalidArgumentError:  indices[0,0] = 515 is not in [0, 279)
	 [[node functional_43/embedding_42/embedding_lookup (defined at <ipython-input-96-54b6d30e8c01>:60) ]] [Op:__inference_train_function_20373]

Errors may have originated from an input operation.
Input Source operations connected to node functional_43/embedding_42/embedding_lookup:
 functional_43/embedding_42/embedding_lookup/20190 (defined at C:\Users\lasse\anaconda3\lib\contextlib.py:113)

Function call stack:
train_function


In [26]:
def one_hot_encode(ground_truth, lst):
    result = []
    for category in lst:
        oh_encoding = np.zeros(len(ground_truth))
        if category in ground_truth:
            print(category)
            index = np.where(ground_truth == category)[0][0]
            
            #Get index og category, and insert 1 into the vector.
            result.append(index)
    return result

In [30]:
print("Loading checkins")
checkin_cols = ['user_id', 'poi_id', 'timestamp', 'timezone']
checkins = pd.read_csv(r'C:\Users\lasse\Desktop\RecommenderDL\datasets\Den_checkins.csv', sep=',', names=checkin_cols, encoding='latin-1').dropna(axis=1)

print("Loading POIs")
venue_cols = ['poi_id', 'latitude', 'longitude', 'category', 'country_code']
pois = pd.read_csv(r'C:\Users\lasse\Desktop\RecommenderDL\datasets\Den_pois.csv', sep=',', names=venue_cols, encoding='latin-1')

c = pd.DataFrame(checkins, columns=['user_id', 'poi_id'])
p = pd.DataFrame(pois, columns=['poi_id', 'latitude', 'longitude', 'category'])

cp = p.merge(c, on='poi_id')

#One checkin for each user
users = checkins.copy()
users.drop_duplicates(subset="user_id", keep = 'first', inplace = True)
print("Checkins: ", len(checkins))
print("Users: ", len(users))
len_checkins = len(checkins)
len_users = len(users)

#The rest of the checkins and categories
checkins_rest = users.merge(checkins, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='right_only']
print("Gotten: ", len(checkins_rest))

#One of each category in checkins_rest
categories1 = pd.DataFrame(pois, columns=['poi_id', 'category'])
categories1 = checkins_rest.merge(categories1, on='poi_id')
users_categories1 = categories1.copy()
users_categories1.drop_duplicates(subset="category", keep = 'first', inplace = True)
print("Unique categories in checkins_rest: ", len(users_categories1))

#FINISHING NOTE: Vi vil have alle colplement af checkins_rest, når vi sammenligner med categories_cat_no_cat, så har vi de 3 gange checkins vi har brug for. Så kan vi lave train/test på det store af dem.
categories_cat_no_cat = pd.DataFrame(users_categories1, columns=['user_id', 'poi_id', 'timestamp', 'timezone', 'category'])
checkins_rest_no_merge = pd.DataFrame(checkins_rest, columns=['user_id', 'poi_id', 'timestamp', 'timezone'])
poisandcategories = pd.DataFrame(pois, columns=['poi_id', 'category'])
checkins_rest_no_merge = checkins_rest_no_merge.merge(poisandcategories, on='poi_id')
print("Total categories: ", len(categories_cat_no_cat))

test = categories_cat_no_cat.merge(checkins_rest_no_merge, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='right_only']
#test = pd.DataFrame(checkins_rest, columns=['user_id', 'poi_id', 'timestamp', 'timezone'])
#cattest = pd.DataFrame(pois, columns=['poi_id', 'category'])
#test.merge(cattest, on='poi_id')
#checkins_rest = cattest.merge(test, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='right_only']

restset = pd.DataFrame(test, columns=['user_id', 'poi_id', 'timestamp', 'timezone'])
userset = users
categoryset = pd.DataFrame(users_categories1, columns=['user_id', 'poi_id', 'timestamp', 'timezone'])

print("Overview:")
print(len(userset))
print(len(categoryset))
print(len(restset))

encoding = pd.DataFrame(userset, columns=['user_id'])
#print(encoding)
encoding_array = {}
temp = 0
for user in encoding.iterrows():
    user = user[0]
    value = encoding._get_value(user, 'user_id')
    encoding_array[value] = temp
    temp += 1

Loading checkins
Loading POIs
Checkins:  10473
Users:  963
Gotten:  9510
Unique categories in checkins_rest:  279
Total categories:  279
Overview:
963
279
9228


In [53]:
###################################
###################################
####### Iteration 1: User #########
###################################
###################################

print("Step 1")
checkin_data = categoryset.merge(pois, on='poi_id')
#checkin_data.drop_duplicates(subset="user_id", keep = 'first', inplace = True)

df = checkin_data.set_index('user_id').poi_id.str.get_dummies(',')
df = df.groupby('user_id').max()

print("Step 2")
checkin_data_no_duplicates = checkin_data.copy()
checkin_data_no_duplicates.drop_duplicates(subset ="poi_id", keep = 'first', inplace = True)
checkin_data_no_duplicates = pd.DataFrame(checkin_data_no_duplicates, columns = ['poi_id', 'category'])

#Extract categorical data
print("Step 3")
categories = pd.DataFrame(checkin_data, columns=['category'])
categories.drop_duplicates(subset ="category", keep = 'first', inplace = True)
category_length = len(categories)
categories_numpy = categories.to_numpy()


#Extracting all of the users and the pois

print("Step 3.5")
listofusers = pd.DataFrame(checkin_data, columns= ['user_id']).groupby('user_id').max().sample(frac=1)
listofpois = pd.DataFrame(checkin_data, columns= ['poi_id', 'latitude', 'longitude']).groupby('poi_id').max().sample(frac=1)
userarray = listofusers.index.to_numpy()
poiarray = listofpois.index.to_numpy()
userdataframe = pd.DataFrame(userarray, columns = ['Users'])
poidataframe = pd.DataFrame(poiarray, columns = ['Poi'])
dot = userdataframe.merge(poidataframe, how='cross')

print("Step 4")

rows_list = []
for i in range(len(dot)):
    temp = dot.loc[i, "Poi"]
    latitude = listofpois.loc[temp]['latitude']
    longitude = listofpois.loc[temp]['longitude']
    dict1 = {'latitude':latitude, 'longitude':longitude}
    rows_list.append(dict1)
    #latitude = poiarray[i]
latlong = pd.DataFrame(rows_list)

#Creating dataset
print("Step 5")
userdot = pd.DataFrame(dot, columns= ['Users'])
latlong['latitude'] = pd.to_numeric(latlong['latitude'])
latlong['longitude'] = pd.to_numeric(latlong['longitude'])
dataset = pd.concat([userdot, latlong], axis=1)

print("Step 6")
rows_list = []
category_list = []
groundtruth = 0
for i in range(len(dot)):
    temp = cp.loc[(cp['poi_id'] == dot.loc[i, "Poi"]) & (cp['user_id'] == dot.loc[i, "Users"])]
    if temp.empty:
        groundtruth = 0
    else:
        groundtruth = 1
    #temp = df[dot.loc[i, "Poi"]][dot.loc[i, "Users"]]
    #temp = df[dot.loc[i, "Poi"]][dot.loc[i, "Users"]]
    dict1 = {'ground_truth':float(groundtruth)}
    rows_list.append(dict1)
    #Extract category from the list
    category = checkin_data_no_duplicates.loc[checkin_data_no_duplicates['poi_id'] == dot.loc[i, "Poi"]]
    cat = category['category']
    index = np.where(categories_numpy == [cat])[0][0]
    category_list.append(index)
#category_label = 
groundtruth = pd.DataFrame(rows_list)
#result = pd.concat([dot, groundtruth], axis=1)

print("Step 7")
datasetst = pd.concat([dataset, groundtruth], axis=1)
categories = pd.DataFrame(category_list, columns=['category'])
datasetstst = pd.concat([datasetst, categories], axis=1)

Step 1
Step 2
Step 3
Step 3.5
Step 4
Step 5
Step 6
Step 7


In [104]:
np.save(r"C:\Users\lasse\Desktop\RecommenderDL\datasets\categories_numpy.npy", categories_numpy)
#d2=np.load(r"C:\Users\lasse\Desktop\RecommenderDL\datasets\d1.npy", allow_pickle=True)

In [58]:
print(datasetstst)

        Users   latitude  longitude  ground_truth  category
0      159853  55.693507  12.541721           0.0        30
1      159853  55.698009  12.474439           0.0       272
2      159853  55.628672  12.644426           0.0       219
3      159853  55.699679  12.536593           0.0       170
4      159853  55.738396  12.571651           0.0        82
...       ...        ...        ...           ...       ...
27616  164326  55.646972  12.541492           0.0       119
27617  164326  55.672940  12.564495           0.0       128
27618  164326  55.680127  12.560722           0.0        98
27619  164326  55.685796  12.568576           0.0       155
27620  164326  55.695400  12.609100           0.0       267

[27621 rows x 5 columns]


In [56]:
for index, row in datasetstst.iterrows():
    long = float(datasetstst.loc[index, 'longitude'])
    lat = float(datasetstst.loc[index, 'latitude'])
    user = float(datasetstst.loc[index, 'Users'])
    cat = float(datasetstst.loc[index, 'category'])
    groundtruth = datasetstst.loc[index, 'ground_truth']
    
    if groundtruth == 0:
        test = cp.loc[(cp['user_id'] == user) & (cp['latitude'] == lat) & (cp['longitude'] == long)]
        boolean = test.empty
        if not boolean:
            print("ERROR \" == 0\"")
            print(index)
            print(user)
            print(lat)
            print(long)
            print(groundtruth)
            
    elif groundtruth == 1:
        test = cp.loc[(cp['user_id'] == user) & (cp['latitude'] == lat) & (cp['longitude'] == long)]
        boolean = test.empty
        if boolean:
            print(test)
            print("ERROR \" == 1\"")
            print(index)
            print(user)
            print(lat)
            print(long)
            print(groundtruth)
    
    

ERROR " == 0"
18456
71191.0
55.737798
12.522954
0.0


In [57]:
print("yo")

yo


In [55]:
    temp = cp.loc[(cp['poi_id'] == dot.loc[i, "Poi"]) & (cp['user_id'] == dot.loc[i, "Users"])]
    if temp.empty:
        groundtruth = 0
    else:
        groundtruth = 1
    #temp = df[dot.loc[i, "Poi"]][dot.loc[i, "Users"]]

        Users   latitude  longitude  ground_truth  category
0       12123  55.681057  12.524736           0.0       152
1       12123  55.678870  12.579392           0.0       263
2       12123  55.593808  12.640104           0.0       275
3       12123  55.679661  12.582324           0.0       109
4       12123  55.662761  12.601849           0.0       227
...       ...        ...        ...           ...       ...
27616  230978  55.685796  12.568576           0.0       155
27617  230978  55.719278  12.551812           0.0        60
27618  230978  55.646972  12.541492           0.0       119
27619  230978  55.679399  12.572565           0.0        46
27620  230978  55.684848  12.538001           0.0       189

[27621 rows x 5 columns]


In [86]:


print("Step 8")
dataset_numpy = datasetstst.to_numpy()

print("Step 9")
x_train_df = pd.DataFrame(dataset_numpy, columns=['Users', 'latitude', 'longitude', 'ground_truth', 'category'])

for index, row in x_train_df.iterrows():
    usr = x_train_df.loc[index, 'Users']
    x_train_df.xs(index)['Users']=encoding_array.get(usr)

print("Step 10")
dataset = tf.convert_to_tensor(
    x_train_df, dtype=None, dtype_hint=None, name=None)

Step 8
Step 9
Step 10


In [103]:
print(x_train_df)

           User   Latitude  Longitude      0
0       80758.0  55.679661  12.582324  109.0
1       80758.0  55.731908  12.575850   84.0
2       80758.0  55.681057  12.524736  152.0
3       80758.0  55.731910  12.483522    1.0
4       80758.0  55.673021  12.554590  136.0
...         ...        ...        ...    ...
27616  141345.0  55.752348  12.571790  172.0
27617  141345.0  55.670639  12.541494  237.0
27618  141345.0  55.788225  12.530318   83.0
27619  141345.0  55.626090  12.478889  228.0
27620  141345.0  55.668386  12.551174   75.0

[27621 rows x 4 columns]


In [7]:
###################################
###################################
####### Iteration 1: User #########
###################################
###################################

print("Step 1")
checkin_data = restset.merge(pois, on='poi_id')
#checkin_data.drop_duplicates(subset="user_id", keep = 'first', inplace = True)
df = checkin_data.set_index('user_id').poi_id.str.get_dummies(',')
df = df.groupby('user_id').max()

print("Step 2")
checkin_data_no_duplicates = checkin_data.copy()
checkin_data_no_duplicates.drop_duplicates(subset ="poi_id", keep = 'first', inplace = True)
checkin_data_no_duplicates = pd.DataFrame(checkin_data_no_duplicates, columns = ['poi_id', 'category'])

#Extract categorical data
print("Step 3")
categories = pd.DataFrame(checkin_data, columns=['category'])
categories.drop_duplicates(subset ="category", keep = 'first', inplace = True)
category_length = len(categories)
categories_numpy = categories.to_numpy()


#Extracting all of the users and the pois
print("Step 3.5")
listofusers = pd.DataFrame(checkin_data, columns= ['user_id']).groupby('user_id').max().sample(frac=1)
listofpois = pd.DataFrame(checkin_data, columns= ['poi_id', 'latitude', 'longitude']).groupby('poi_id').max().sample(frac=1)
userarray = listofusers.index.to_numpy()
poiarray = listofpois.index.to_numpy()
userdataframe = pd.DataFrame(userarray, columns = ['Users'])
poidataframe = pd.DataFrame(poiarray, columns = ['Poi'])
dot = userdataframe.merge(poidataframe, how='cross')

print("Step 4")
rows_list = []
for i in range(len(dot)):
    temp = dot.loc[i, "Poi"]
    latitude = listofpois.loc[temp]['latitude']
    longitude = listofpois.loc[temp]['longitude']
    dict1 = {'latitude':latitude, 'longitude':longitude}
    rows_list.append(dict1)
    #latitude = poiarray[i]
latlong = pd.DataFrame(rows_list)

#Creating dataset
print("Step 5")
userdot = pd.DataFrame(dot, columns= ['Users'])
latlong['latitude'] = pd.to_numeric(latlong['latitude'])
latlong['longitude'] = pd.to_numeric(latlong['longitude'])
dataset = pd.concat([userdot, latlong], axis=1)

#Extracting ground_truth from incidence matrix
print("Step 6")
rows_list = []
category_list = []
for i in range(len(dot)):
    temp = df[dot.loc[i, "Poi"]][dot.loc[i, "Users"]]
    dict1 = {'ground_truth':float(temp)}
    rows_list.append(dict1)
    #Extract category from the list
    category = checkin_data_no_duplicates.loc[checkin_data_no_duplicates['poi_id'] == dot.loc[i, "Poi"]]
    cat = category['category']
    index = np.where(categories_numpy == [cat])[0][0]
    category_list.append(index)
#category_label = 
groundtruth = pd.DataFrame(rows_list)
#result = pd.concat([dot, groundtruth], axis=1)

print("Step 7")
categories = pd.DataFrame(category_list, columns=['category'])
datasetst = pd.concat([dataset, categories], axis=1)

print("Step 8")
dataset_numpy = datasetst.to_numpy()
labels_numpy = groundtruth.to_numpy()
categories_numpy = categories.to_numpy()

#x_train, x_test, y_train, y_test = train_test_split(dataset_numpy, labels_numpy, test_size=0.05, random_state=0)

print("Step 9")
x_train_df = pd.DataFrame(dataset_numpy, columns=['User','Latitude','Longitude', '0'])
#x_test_df = pd.DataFrame(x_test, columns=['User','Latitude','Longitude', '0'])
y_train_df = pd.DataFrame(labels_numpy)
#y_test_df = pd.DataFrame(y_test)

#Dataset with Users
dataset1_df = pd.DataFrame(x_train_df['User'])

index = 0
for user in dataset1_df.iterrows():
    user = user[0]
    value = dataset1_df._get_value(user, 'User')
    dataset1_df.xs(user)['User']=encoding_array.get(value)
    #encoding.at[index,'user_id']=encoding_array.get(user)
    index += 1
    
#Dataset with Poi's
dataset2_df = pd.DataFrame(x_train_df[['Latitude']])
dataset3_df = pd.DataFrame(x_train_df[['Longitude']])

dataset4_df = pd.DataFrame(x_train_df[['0']])

print("Step 10")
dataset1 = tf.convert_to_tensor(
    dataset1_df, dtype=None, dtype_hint=None, name=None)
dataset2 = tf.convert_to_tensor(
    dataset2_df, dtype=None, dtype_hint=None, name=None)
dataset3 = tf.convert_to_tensor(
    dataset3_df, dtype=None, dtype_hint=None, name=None)
dataset4 = tf.convert_to_tensor(
    dataset4_df, dtype='int64', dtype_hint=None, name=None)
labels = tf.convert_to_tensor(
    y_train_df, dtype=None, dtype_hint=None, name=None)

Step 1


NameError: name 'restset' is not defined

In [106]:
print(x_test_df)

NameError: name 'x_test_df' is not defined

In [8]:
encoding = pd.DataFrame(users, columns=['user_id'])
#print(encoding)
encoding_array = {}
temp = 0
for user in encoding.iterrows():
    user = user[0]
    value = encoding._get_value(user, 'user_id')
    encoding_array[value] = temp
    temp += 1

print(encoding_array)    
    
# Iteration 1: User

print("Step 1")
checkin_data = users.merge(pois, on='poi_id')
df = checkin_data.set_index('user_id').poi_id.str.get_dummies(',')
df = df.groupby('user_id').max()

print("Step 2")
checkin_data_no_duplicates = checkin_data.copy()
checkin_data_no_duplicates.drop_duplicates(subset ="poi_id", keep = 'first', inplace = True)
checkin_data_no_duplicates = pd.DataFrame(checkin_data_no_duplicates, columns = ['poi_id', 'category'])

#Extract categorical data
print("Step 3")
categories = pd.DataFrame(checkin_data, columns=['category'])
categories.drop_duplicates(subset ="category", keep = 'first', inplace = True)
category_length = len(categories)
categories_numpy = categories.to_numpy()


#Extracting all of the users and the pois
print("Step 3.5")
listofusers = pd.DataFrame(checkin_data, columns= ['user_id']).groupby('user_id').max().sample(frac=1)
listofpois = pd.DataFrame(checkin_data, columns= ['poi_id', 'latitude', 'longitude']).groupby('poi_id').max().sample(frac=1)
userarray = listofusers.index.to_numpy()
poiarray = listofpois.index.to_numpy()
userdataframe = pd.DataFrame(userarray, columns = ['Users'])
poidataframe = pd.DataFrame(poiarray, columns = ['Poi'])
dot = userdataframe.merge(poidataframe, how='cross')

print("Step 4")
rows_list = []
for i in range(len(dot)):
    temp = dot.loc[i, "Poi"]
    latitude = listofpois.loc[temp]['latitude']
    longitude = listofpois.loc[temp]['longitude']
    dict1 = {'latitude':latitude, 'longitude':longitude}
    rows_list.append(dict1)
    #latitude = poiarray[i]
latlong = pd.DataFrame(rows_list)

#Creating dataset
print("Step 5")
userdot = pd.DataFrame(dot, columns= ['Users'])
latlong['latitude'] = pd.to_numeric(latlong['latitude'])
latlong['longitude'] = pd.to_numeric(latlong['longitude'])
dataset = pd.concat([userdot, latlong], axis=1)

#Extracting ground_truth from incidence matrix
print("Step 6")
rows_list = []
category_list = []
for i in range(len(dot)):
    temp = df[dot.loc[i, "Poi"]][dot.loc[i, "Users"]]
    dict1 = {'ground_truth':float(temp)}
    rows_list.append(dict1)
    #Extract category from the list
    category = checkin_data_no_duplicates.loc[checkin_data_no_duplicates['poi_id'] == dot.loc[i, "Poi"]]
    cat = category['category']
    index = np.where(categories_numpy == [cat])[0][0]
    category_list.append(index)
#category_label = 
groundtruth = pd.DataFrame(rows_list)
#result = pd.concat([dot, groundtruth], axis=1)

print("Step 7")
categories = pd.DataFrame(category_list, columns=['category'])
datasetst = pd.concat([dataset, categories], axis=1)

print("Step 8")
dataset_numpy = datasetst.to_numpy()
labels_numpy = groundtruth.to_numpy()
categories_numpy = categories.to_numpy()

#x_train, x_test, y_train, y_test = train_test_split(dataset_numpy, labels_numpy, test_size=0.05, random_state=0)

print("Step 9")
x_train_df = pd.DataFrame(dataset_numpy, columns=['User','Latitude','Longitude', '0'])
#x_test_df = pd.DataFrame(x_test, columns=['User','Latitude','Longitude', '0'])
y_train_df = pd.DataFrame(labels_numpy)
#y_test_df = pd.DataFrame(y_test)

#Dataset with Users
dataset1_df = pd.DataFrame(x_train_df['User'])

index = 0
for user in dataset1_df.iterrows():
    user = user[0]
    value = dataset1_df._get_value(user, 'User')
    dataset1_df.xs(user)['User']=encoding_array.get(value)
    #encoding.at[index,'user_id']=encoding_array.get(user)
    index += 1
    
#Dataset with Poi's
dataset2_df = pd.DataFrame(x_train_df[['Latitude']])
dataset3_df = pd.DataFrame(x_train_df[['Longitude']])

dataset4_df = pd.DataFrame(x_train_df[['0']])

print("Step 10")
dataset1 = tf.convert_to_tensor(
    dataset1_df, dtype=None, dtype_hint=None, name=None)
dataset2 = tf.convert_to_tensor(
    dataset2_df, dtype=None, dtype_hint=None, name=None)
dataset3 = tf.convert_to_tensor(
    dataset3_df, dtype=None, dtype_hint=None, name=None)
dataset4 = tf.convert_to_tensor(
    dataset4_df, dtype='int64', dtype_hint=None, name=None)
labels = tf.convert_to_tensor(
    y_train_df, dtype=None, dtype_hint=None, name=None)

{233919: 0, 190585: 1, 24779: 2, 30835: 3, 3884: 4, 18507: 5, 125878: 6, 79872: 7, 112400: 8, 16753: 9, 198380: 10, 3166: 11, 248459: 12, 166003: 13, 65942: 14, 85610: 15, 212753: 16, 11129: 17, 134643: 18, 98834: 19, 228886: 20, 9912: 21, 141345: 22, 36884: 23, 240687: 24, 259108: 25, 80758: 26, 132466: 27, 23885: 28, 81032: 29, 44648: 30, 24890: 31, 174210: 32, 178954: 33, 201854: 34, 75696: 35, 167913: 36, 93131: 37, 196441: 38, 3354: 39, 245399: 40, 139066: 41, 204627: 42, 110075: 43, 259848: 44, 99909: 45, 12829: 46, 162192: 47, 41675: 48, 43277: 49, 79388: 50, 3457: 51, 221413: 52, 145644: 53, 88502: 54, 87745: 55, 123798: 56, 201860: 57, 34500: 58, 64920: 59, 74284: 60, 215891: 61, 76403: 62, 81724: 63, 6085: 64, 234549: 65, 1643: 66, 150809: 67, 113696: 68, 57410: 69, 132704: 70, 183987: 71, 256254: 72, 26478: 73, 164480: 74, 5523: 75, 148702: 76, 230601: 77, 103351: 78, 129961: 79, 56432: 80, 152507: 81, 54512: 82, 186735: 83, 80978: 84, 50064: 85, 120094: 86, 86918: 87, 82541

## dataset2_df[0].shape

In [None]:
print(y_train_df)

In [72]:
model = EmbModel(len(users), category_length)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8)

model.compile(
    optimizer
)

#Train_data, [dataset.user_id, dataset.poi_id]. Label: ground_truth
model.fit([dataset4, dataset2, dataset3, dataset1], labels, epochs = 25, batch_size=27)

Model: "functional_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
poi_latitude (InputLayer)       [(None, 1)]          0                                            
__________________________________________________________________________________________________
poi_longitude (InputLayer)      [(None, 1)]          0                                            
__________________________________________________________________________________________________
concatenate_14 (Concatenate)    (None, 2)            0           poi_latitude[0][0]               
                                                                 poi_longitude[0][0]              
__________________________________________________________________________________________________
category_input (InputLayer)     [(None, 1)]          0                                

StagingError: in user code:

    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\lasse\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    <ipython-input-71-87c85096a80c>:52 train_step
        long_data = real_data[2]

    IndexError: tuple index out of range


In [81]:
model.save(r'C:\Users\lasse\Desktop\RecommenderDL\models\model2')



ValueError: Model <__main__.EmbModel object at 0x0000028064F13940> cannot be saved because the input shapes have not been set. Usually, input shapes are automatically determined from calling `.fit()` or `.predict()`. To manually set the shapes, call `model.build(input_shape)`.

In [16]:
print(model.input_shape)

AttributeError: The layer has never been called and thus has no defined input shape.