Skip to content

Commit

Permalink
Merge pull request #29 from DushanSenadheera/dev
Browse files Browse the repository at this point in the history
update api
  • Loading branch information
DushanSenadheera committed Apr 25, 2024
2 parents 26cf159 + 9451843 commit 47f79c5
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 44 deletions.
22 changes: 11 additions & 11 deletions model/data/southern.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Title,Reviews,Rating,Category,Location,Duration,Budget
Galle Fort,3516,4.5,point of interests,Galle,3,0
Bentota Beach,2305,4.5,beaches,Bentota,1,0
Old Town of Galle and its Fortifications,3511,4.5,point of interests,Galle,1,0
Mirissa Beach,3095,4.5,beaches,Mirissa,1,0
Sea Turtle Farm Galle Mahamodara,1076,4.5,nature & wildlife,Galle,2,30
Hikkaduwa Beach,2383,4,beaches,Hikkaduwa,3,0
Bundala National Park,595,4.5,nature & wildlife,Weligatta,4,40
Kalametiya Lagoon Bird Sanctuary & Wetland Park,380,5,nature & wildlife,Tangalle,3,30
Sinharaja Forest Reserve,612,4.5,nature & wildlife,Deniyaya,4,30
Jungle Beach,2824,3.5,beaches,Unawatuna,1,0
Title,Reviews,Rating,Category,Location,Duration,Budget,lati,long
Galle Fort,3516,4.5,point of interests,Galle,3,0,6.0329,80.2168
Bentota Beach,2305,4.5,beaches,Bentota,1,0,6.4189,80.006
Old Town of Galle and its Fortifications,3511,4.5,point of interests,Galle,1,0,6.0329,80.2168
Mirissa Beach,3095,4.5,beaches,Mirissa,1,0,5.9483,80.4716
Sea Turtle Farm Galle Mahamodara,1076,4.5,nature & wildlife,Galle,2,30,6.0329,80.2168
Hikkaduwa Beach,2383,4,beaches,Hikkaduwa,3,0,6.1395,80.1063
Bundala National Park,595,4.5,nature & wildlife,Weligatta,4,40,6.1969,81.2409
Kalametiya Lagoon Bird Sanctuary & Wetland Park,380,5,nature & wildlife,Tangalle,3,30,6.0243,80.7941
Sinharaja Forest Reserve,612,4.5,nature & wildlife,Deniyaya,4,30,6.3425,80.5597
Jungle Beach,2824,3.5,beaches,Unawatuna,1,0,6.0174,80.2489
98 changes: 75 additions & 23 deletions model/src/location.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
#import libraries
import pandas as pd
import json
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from math import radians, cos, sin, asin, sqrt

#laod the dataset
# Assuming df is your DataFrame and it has been defined and loaded with data
df = pd.read_csv('../model/data/southern.csv')

# Combine the 'Location', 'Category', 'Budget', and 'Duration' columns into a single 'Features' column
df['Features'] = df['Location'] + ' ' + df['Category'] + ' ' + df['Budget'].astype(str) + ' ' + df['Duration'].astype(str)

# Calculate TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
df['Features'] = df['Features'].fillna('')
tfidf_matrix = tfidf.fit_transform(df['Features'])

# Calculate cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Function to calculate the distance between two points using their coordinates
def haversine(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers
return c * r

# Function to recommend locations based on similarity score
def recommend_locations(user_input_location, user_input_categories, user_input_budget, user_input_duration, cosine_sim=cosine_sim):
def recommend_locations(user_input_location, user_input_categories, user_input_budget, user_input_days):
user_input_duration = user_input_days * 8 # Calculate total duration based on number of days

# Create a new dataframe to store the user's input
user_df = pd.DataFrame([[user_input_location, ' '.join(user_input_categories), user_input_budget, user_input_duration]], columns=['Location', 'Category', 'Budget', 'Duration'])
user_df['Features'] = user_df['Location'] + ' ' + user_df['Category'] + ' ' + user_df['Budget'].astype(str) + ' ' + user_df['Duration'].astype(str)
Expand All @@ -30,26 +36,72 @@ def recommend_locations(user_input_location, user_input_categories, user_input_b
# Calculate the cosine similarity between the user's input and the locations in the dataframe
cosine_sim_user = linear_kernel(user_tfidf, tfidf_matrix)

# Get the top 10 most similar locations
# Get the most similar locations
sim_scores = list(enumerate(cosine_sim_user[0]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[0:10]
location_indices = [i[0] for i in sim_scores]
recommended_locations = df.iloc[location_indices]
return recommended_locations

# Filter locations based on duration, location, categories, and budget
location_indices = [i[0] for i in sim_scores if df.iloc[i[0]]['Duration'] <= user_input_duration and df.iloc[i[0]]['Location'] == user_input_location and any(category in df.iloc[i[0]]['Category'] for category in user_input_categories) and df.iloc[i[0]]['Budget'] <= user_input_budget]

# If there are not enough locations in the input location, get other locations
if len(location_indices) < user_input_duration / 8:
remaining_duration = user_input_duration - sum(df.iloc[i]['Duration'] for i in location_indices)
other_location_indices = [i[0] for i in sim_scores if df.iloc[i[0]]['Duration'] <= remaining_duration and df.iloc[i[0]]['Location'] != user_input_location and any(category in df.iloc[i[0]]['Category'] for category in user_input_categories) and df.iloc[i[0]]['Budget'] <= user_input_budget]
for index in other_location_indices:
location_duration = df.iloc[index]['Duration']
if remaining_duration - location_duration >= 0:
location_indices.append(index)
remaining_duration -= location_duration
else:
break

# Get the coordinates of the input location
input_location_coordinates = df[df['Location'] == user_input_location][['lati', 'long']].values[0]

# Calculate the distance from the input location to each location in the DataFrame
df['Distance'] = df.apply(lambda row: haversine(input_location_coordinates[1], input_location_coordinates[0], row['long'], row['lati']), axis=1)

# Sort the locations by distance
location_indices = sorted(location_indices, key=lambda x: df.iloc[x]['Distance'])

# Split locations into days based on a maximum of 8 hours per day
recommended_locations_per_day = []
current_day_duration = 0
current_day_locations = []
for index in location_indices:
location_duration = df.iloc[index]['Duration']
if current_day_duration + location_duration > 8:
if current_day_locations:
recommended_locations_per_day.append(df.iloc[current_day_locations])
current_day_duration = location_duration
current_day_locations = [index]
else:
current_day_locations.append(index)
current_day_duration += location_duration
if len(recommended_locations_per_day) == user_input_days:
break
if current_day_locations and len(recommended_locations_per_day) < user_input_days:
recommended_locations_per_day.append(df.iloc[current_day_locations])

return recommended_locations_per_day

# Get user input
user_input_location = "Tangalle"
user_input_category = ["point of interests", "beaches", "nature & wildlife"]
user_input_budget = 1100
user_input_duration = 7
user_input_budget = 1000
user_input_days = 3 # Number of days

# Recommend locations based on the user's input
recommendations = recommend_locations(user_input_location, user_input_category, user_input_budget, user_input_duration)

# print the results as a JSON string
print(json.dumps(
{
"location": recommendations.to_dict('records')
}
))
recommendations = recommend_locations(user_input_location, user_input_category, user_input_budget, user_input_days)

recommendations_list = []
for i, day in enumerate(recommendations):
recommendations_list.append({
"Day": i + 1,
"Locations": day.to_dict('records')
})

# Print the list as a JSON array
print(json.dumps(recommendations_list))


7 changes: 1 addition & 6 deletions model/src/util/recommendation system.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
"#import libraries\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import sys\n",
"import json\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.metrics.pairwise import linear_kernel\n",
"from sklearn.metrics.pairwise import cosine_similarity"
"import seaborn as sns"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ app.use('/', stayRoute)
app.use('/', locationRoute)

app.listen(process.env.PORT, () => {
console.log(`app is listening on port ${process.env.PORT}!`)
console.log(`server is listening on port ${process.env.PORT}!`)
})

module.exports = app;
3 changes: 1 addition & 2 deletions server/routes/locationRoute.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ router.get('/api/location/', (req, res) => {
python.on('close', (code) => {
console.log(`child process close all stdio with code ${code}`);
// send data to browser
res.send(dataToSend)

res.send(dataToSend);
});
})

Expand Down
1 change: 0 additions & 1 deletion server/routes/stayRoute.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ router.get('/api/stay/', (req, res) => {
console.log(`child process close all stdio with code ${code}`);
// send data to browser
res.send(dataToSend)

});
})

Expand Down

0 comments on commit 47f79c5

Please sign in to comment.