<a href="https://colab.research.google.com/github/Kiron-Ang/DSC/blob/main/vacation_recommender_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Vacation Recommender System
### Kiron Ang, November 2024

In [1]:
print("Printing version numbers. . .")
!python -V

!pip install -U polars > output.txt
import polars
print("polars", polars.__version__)

!pip install -U scikit-learn > output.txt
import sklearn
print("scikit-learn", sklearn.__version__)

import ipywidgets
print("ipywidgets", ipywidgets.__version__)

import IPython
print("IPython", IPython.__version__)

Printing version numbers. . .
Python 3.10.12
polars 1.12.0
scikit-learn 1.5.2
ipywidgets 7.7.1
IPython 7.34.0


In [2]:
# data.un.org
tourism = polars.read_csv("https://data.un.org/_Docs/SYB/CSV/SYB66_176_202310_Tourist-Visitors%20Arrival%20and%20Expenditure.csv", encoding = "latin-1", skip_rows = 1)
gdp = polars.read_csv("https://data.un.org/_Docs/SYB/CSV/SYB66_230_202310_GDP%20and%20GDP%20Per%20Capita.csv", encoding = "latin-1", skip_rows = 1)
crime = polars.read_csv("https://data.un.org/_Docs/SYB/CSV/SYB66_328_202310_Intentional%20homicides%20and%20other%20crimes.csv", encoding = "latin-1", skip_rows = 1, infer_schema = False)

tourism = tourism.filter(tourism["Year"] == 2021)
gdp = gdp.filter(gdp["Year"] == 2021)
crime = crime.filter(crime["Year"] == "2021")

tourism = tourism.filter(tourism["Series"] == "Tourist/visitor arrivals (thousands)")
gdp = gdp.filter(gdp["Series"] == "GDP per capita (US dollars)")
crime = crime.filter(crime["Series"] == "Assault rate per 100,000 population")

tourism = tourism.rename({"": "country"})
gdp = gdp.rename({"": "country"})
crime = crime.rename({"": "country"})

tourism = tourism.drop("Region/Country/Area", "Year", "Series", "Tourism arrivals series type", "Tourism arrivals series type footnote", "Footnotes", "Source")
gdp = gdp.drop("Region/Country/Area", "Year", "Series", "Footnotes", "Source")
crime = crime.drop("Region/Country/Area", "Year", "Series", "Footnotes", "Source")

tourism = tourism.rename({"Value": "tourist_arrivals_thousands"})
gdp = gdp.rename({"Value": "gdp_per_capita"})
crime = crime.rename({"Value": "assault_rate_per_100000"})

two = tourism.join(gdp, on = "country")
all = two.join(crime, on = "country")

all = all.with_columns([
    polars.col("tourist_arrivals_thousands").str.replace(",", "").cast(polars.Float64),
    polars.col("gdp_per_capita").str.replace(",", "").cast(polars.Float64),
    polars.col("assault_rate_per_100000").str.replace(",", "").cast(polars.Float64),
])

all

country,tourist_arrivals_thousands,gdp_per_capita,assault_rate_per_100000
str,f64,f64,f64
"""Albania""",5515.0,6396.0,5.7
"""Algeria""",125.0,3700.0,22.7
"""Argentina""",297.0,10761.0,340.6
"""Australia""",246.0,66916.0,289.0
"""Austria""",12728.0,53840.0,40.5
…,…,…,…
"""Switzerland""",4390.0,93525.0,7.5
"""Thailand""",428.0,7067.0,13.2
"""United Arab Emirates""",11479.0,43295.0,1.5
"""United States of America""",22100.0,69185.0,280.1


In [3]:
countries = all["country"].to_list()
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

country_dropdown = ipywidgets.Dropdown(options = countries, description = "Country:")
month_dropdown = ipywidgets.Dropdown(options = months, description = "Month:")

past_vacations = {}

def submit_survey(month, country):
    if month in past_vacations:
        past_vacations[month].append(country)
    else:
        past_vacations[month] = [country]
    print(f"Adding {month} trip to {country}. . .")
    print(f"Vacations: {past_vacations}")

def on_submit(button):
    submit_survey(month_dropdown.value, country_dropdown.value)

submit_button = ipywidgets.Button(description = "Submit")
submit_button.on_click(on_submit)

print("Please use the form below to enter information")
print("about previous vacations that you enjoyed.")
print("Select the month that you traveled, along with")
print("the country that you visited. If your trip was")
print("longer than a month, then put down the month")
print("that you enjoyed the most. Fill out the form as")
print("many times as you need to. If you visited a")
print("country several times, please fill out the form")
print("for each time you visited.")
print("")

IPython.display.display(month_dropdown, country_dropdown, submit_button)

Please use the form below to enter information
about previous vacations that you enjoyed.
Select the month that you traveled, along with
the country that you visited. If your trip was
longer than a month, then put down the month
that you enjoyed the most. Fill out the form as
many times as you need to. If you visited a
country several times, please fill out the form
for each time you visited.



Dropdown(description='Month:', options=('January', 'February', 'March', 'April', 'May', 'June', 'July', 'Augus…

Dropdown(description='Country:', options=('Albania', 'Algeria', 'Argentina', 'Australia', 'Austria', 'Bahamas'…

Button(description='Submit', style=ButtonStyle())

Adding October trip to Albania. . .
Vacations: {'October': ['Albania']}
Adding June trip to Canada. . .
Vacations: {'October': ['Albania'], 'June': ['Canada']}
Adding July trip to United States of America. . .
Vacations: {'October': ['Albania'], 'June': ['Canada'], 'July': ['United States of America']}
Adding July trip to United States of America. . .
Vacations: {'October': ['Albania'], 'June': ['Canada'], 'July': ['United States of America', 'United States of America']}
Adding July trip to United States of America. . .
Vacations: {'October': ['Albania'], 'June': ['Canada'], 'July': ['United States of America', 'United States of America', 'United States of America']}
Adding July trip to United States of America. . .
Vacations: {'October': ['Albania'], 'June': ['Canada'], 'July': ['United States of America', 'United States of America', 'United States of America', 'United States of America']}
Adding July trip to United States of America. . .
Vacations: {'October': ['Albania'], 'June': ['

In [4]:
import sklearn.metrics.pairwise
cosine_similarity = polars.DataFrame(sklearn.metrics.pairwise.cosine_similarity(all[:, 1:]))

new_names_dictionary = {}
default_names = cosine_similarity.columns
index = 0
for country in all["country"]:
  new_names_dictionary[default_names[index]] = country
  index += 1

cosine_similarity = cosine_similarity.rename(new_names_dictionary)
cosine_similarity = cosine_similarity.insert_column(0, all["country"])
cosine_similarity

country,Albania,Algeria,Argentina,Australia,Austria,Bahamas,Barbados,Belgium,Belize,Bosnia and Herzegovina,Bulgaria,Canada,Chile,"China, Hong Kong SAR","China, Macao SAR",Colombia,Costa Rica,Croatia,Cyprus,Denmark,Dominica,Dominican Republic,Ecuador,El Salvador,Estonia,Eswatini,Finland,France,Germany,Ghana,Greece,Grenada,Guatemala,Guyana,Honduras,Iceland,…,Jamaica,Japan,Jordan,Latvia,Liechtenstein,Lithuania,Luxembourg,Malaysia,Malta,Mauritius,Mongolia,Montenegro,Morocco,Namibia,Netherlands (Kingdom of the),New Zealand,Norway,Oman,Panama,Paraguay,Peru,Poland,Portugal,Qatar,Republic of Korea,Romania,Saint Kitts and Nevis,Saint Lucia,Serbia,Singapore,Slovenia,Sweden,Switzerland,Thailand,United Arab Emirates,United States of America,Uzbekistan
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""Albania""",1.0,0.778946,0.774701,0.75973,0.88726,0.777817,0.762681,0.797098,0.779211,0.801254,0.865154,0.794336,0.764913,0.758518,0.809856,0.931121,0.823037,0.988494,0.795093,0.901585,0.758613,0.983988,0.825393,0.900318,0.775866,0.781586,0.767066,0.992925,0.88393,0.895195,0.995867,0.749185,0.831313,0.76753,0.853815,0.763894,…,0.906112,0.761376,0.968596,0.771822,0.757609,0.782684,0.761018,0.765224,0.775808,0.770427,0.762096,0.855045,0.998082,0.785598,0.823059,0.759949,0.767741,0.772652,0.78423,0.767524,0.799329,0.978001,0.896211,0.76328,0.775115,0.961369,0.757909,0.76844,0.818977,0.760506,0.796827,0.788535,0.787125,0.795431,0.899402,0.920128,0.999378
"""Algeria""",0.778946,1.0,0.999656,0.999546,0.980375,0.999758,0.999625,0.999558,0.999416,0.999334,0.988396,0.999687,0.999753,0.999477,0.998686,0.953385,0.997265,0.864834,0.999642,0.973555,0.999493,0.878243,0.996966,0.974024,0.99997,0.996577,0.999809,0.699174,0.981771,0.974439,0.832678,0.985285,0.995879,0.999831,0.991516,0.999704,…,0.970875,0.999603,0.910401,0.999925,0.99943,0.999965,0.99959,0.999754,0.999974,0.999902,0.999643,0.991208,0.742171,0.997379,0.997264,0.999479,0.999827,0.999931,0.999963,0.999826,0.999438,0.892616,0.976282,0.999678,0.999971,0.921458,0.999358,0.997715,0.997776,0.999564,0.99957,0.999867,0.999895,0.999634,0.974687,0.96231,0.756359
"""Argentina""",0.774701,0.999656,1.0,0.999341,0.978686,0.999982,0.999698,0.999124,0.999934,0.998691,0.987046,0.999128,0.999479,0.999204,0.997872,0.951831,0.996593,0.861324,0.998991,0.971669,0.999389,0.874918,0.996136,0.972687,0.999505,0.998274,0.999438,0.694622,0.980178,0.974194,0.828854,0.989418,0.995464,0.999697,0.990446,0.999362,…,0.969464,0.999283,0.907424,0.999529,0.999159,0.999432,0.999279,0.999415,0.999529,0.999545,0.999369,0.989946,0.738326,0.998755,0.996321,0.999648,0.999444,0.999493,0.999599,0.999474,0.998763,0.889402,0.974463,0.99933,0.999548,0.9186,0.999592,0.999143,0.997077,0.999245,0.998939,0.99929,0.999316,0.999016,0.97282,0.960247,0.751977
"""Australia""",0.75973,0.999546,0.999341,1.0,0.974009,0.999305,0.999917,0.998208,0.998859,0.99779,0.983382,0.998487,0.999968,0.999993,0.996712,0.943858,0.994584,0.849342,0.998411,0.96625,0.999992,0.86346,0.994171,0.966756,0.999674,0.995705,0.999929,0.677336,0.975612,0.967349,0.815646,0.985377,0.992723,0.99991,0.98715,0.999972,…,0.963212,0.999989,0.897547,0.999819,0.999989,0.999341,0.999991,0.999959,0.99968,0.99986,0.999992,0.986785,0.721638,0.996366,0.994603,0.999891,0.999915,0.999789,0.999251,0.999923,0.997994,0.878655,0.969336,0.999976,0.999708,0.909362,0.999859,0.997547,0.995315,0.99999,0.99825,0.998958,0.999057,0.998384,0.967529,0.953694,0.736338
"""Austria""",0.88726,0.980375,0.978686,0.974009,1.0,0.979754,0.974946,0.985739,0.979892,0.986907,0.998943,0.984991,0.975793,0.97359,0.989147,0.993811,0.9922,0.946823,0.985208,0.999486,0.973605,0.955263,0.992741,0.99935,0.979409,0.978359,0.976523,0.826326,0.999972,0.997882,0.925488,0.959525,0.993727,0.976647,0.997652,0.975449,…,0.998853,0.974584,0.974087,0.978099,0.973271,0.981547,0.974461,0.975901,0.97939,0.977641,0.974832,0.99784,0.859505,0.980254,0.992248,0.973969,0.976749,0.978371,0.982003,0.976676,0.986387,0.963963,0.999804,0.975239,0.979168,0.979954,0.973232,0.975128,0.991311,0.974283,0.985695,0.983312,0.982892,0.985303,0.999634,0.997029,0.870447
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Switzerland""",0.787125,0.999895,0.999316,0.999057,0.982892,0.999503,0.999125,0.999819,0.999123,0.999725,0.990305,0.999924,0.999374,0.998983,0.999287,0.957085,0.998099,0.871364,0.999915,0.976486,0.998961,0.884449,0.997886,0.976797,0.999837,0.996094,0.999492,0.708466,0.984184,0.976853,0.839892,0.983793,0.996828,0.999467,0.993102,0.999323,…,0.973818,0.999172,0.915765,0.999701,0.998919,0.999974,0.999149,0.999396,0.999835,0.999643,0.999214,0.992866,0.750761,0.997034,0.998162,0.998908,0.999525,0.999733,0.999963,0.999514,0.999798,0.898473,0.979057,0.999287,0.999814,0.926497,0.998735,0.997033,0.998534,0.999116,0.999872,0.999997,1.0,0.999906,0.977556,0.965793,0.764891
"""Thailand""",0.795431,0.999634,0.999016,0.998384,0.985303,0.999262,0.998537,0.999965,0.998941,0.999952,0.992104,0.999997,0.998808,0.998278,0.999706,0.960965,0.998849,0.877948,0.999999,0.979323,0.998271,0.890709,0.998682,0.979638,0.999498,0.996102,0.998966,0.718,0.986502,0.979653,0.847186,0.983455,0.997806,0.998968,0.994613,0.99873,…,0.976839,0.998526,0.921138,0.999277,0.998196,0.999783,0.998496,0.998832,0.999496,0.999192,0.998587,0.994396,0.759703,0.997094,0.998892,0.998268,0.999013,0.999325,0.999819,0.998999,0.999979,0.904353,0.981731,0.998681,0.999461,0.931522,0.998056,0.996707,0.999182,0.998452,0.999997,0.999936,0.999906,1.0,0.980326,0.969232,0.773572
"""United Arab Emirates""",0.899402,0.974687,0.97282,0.967529,0.999634,0.974017,0.968582,0.980831,0.974221,0.982186,0.997336,0.979964,0.969524,0.967063,0.984815,0.996319,0.988473,0.955172,0.980217,0.999987,0.967079,0.962906,0.989127,0.999769,0.973595,0.972873,0.970344,0.841238,0.999412,0.998049,0.935386,0.953067,0.990395,0.970483,0.995444,0.969141,…,0.999637,0.968174,0.979843,0.972116,0.966708,0.976021,0.968036,0.969647,0.973573,0.971599,0.968449,0.995699,0.872969,0.974913,0.988527,0.967494,0.970598,0.972423,0.976539,0.970516,0.981581,0.9708,0.999974,0.968906,0.973322,0.984981,0.966675,0.969053,0.987394,0.967838,0.980779,0.978035,0.977556,0.980326,1.0,0.998742,0.883432
"""United States of America""",0.920128,0.96231,0.960247,0.953694,0.997029,0.961664,0.955019,0.969905,0.962051,0.971569,0.99245,0.968789,0.956071,0.953126,0.974896,0.999186,0.979722,0.968775,0.969091,0.998982,0.953176,0.975195,0.980558,0.99872,0.960958,0.961296,0.95704,0.867239,0.996456,0.996746,0.951891,0.940169,0.982442,0.957253,0.98948,0.955599,…,0.999266,0.954444,0.988603,0.959175,0.952702,0.963911,0.954281,0.956207,0.960935,0.958556,0.954781,0.989823,0.896323,0.963539,0.979734,0.953743,0.957344,0.959539,0.964578,0.957249,0.9708,0.981575,0.998354,0.955317,0.960632,0.992374,0.952782,0.95631,0.978281,0.954043,0.969797,0.966387,0.965793,0.969232,0.998742,1.0,0.905753


In [17]:
past_countries = list(set([country for month in past_vacations.values() for country in month]))
similar_countries = []

for country in past_countries:
  most_similar = cosine_similarity.sort(by = country, descending = True).select(["country", country]).slice(1, 3)
  for similar_country in most_similar["country"]:
    similar_countries.append(similar_country)

similar_countries = list(set(similar_countries))
similar_countries

['Italy',
 'Thailand',
 'Uzbekistan',
 'Indonesia',
 'Morocco',
 'Cyprus',
 'Colombia',
 'Jamaica',
 'Slovenia']

In [20]:
past_months = {key: len(value) for key, value in past_vacations.items()}
most_frequent_month = max(past_months, key = past_months.get)
most_frequent_month

'July'

In [21]:
print(f"For your next vacation, you should travel in {most_frequent_month}")
print("to one of the countries below:")

for country in similar_countries:
  print(f"• {country}")

For your next vacation, you should travel in July
to one of the countries below:
• Italy
• Thailand
• Uzbekistan
• Indonesia
• Morocco
• Cyprus
• Colombia
• Jamaica
• Slovenia
