In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.distance import geodesic

In [3]:
def get_capital_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find("table", class_="wikitable")
    rows = table.find_all("tr")
    result = []
    for row in rows[1:]:
        columns = row.find_all("td")
        country = columns[1].text.strip()
        capital = columns[4].text.strip()
        if columns[4].find("a", href=True):
            capital_link = columns[4].find("a")["href"]
        else:
            capital_link = columns[1].find("a")["href"]
        result.append([country, capital, capital_link])
    return result

In [4]:
def get_lat_long(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    latitude = soup.find("span", class_="latitude").text.strip()
    longitude = soup.find("span", class_="longitude").text.strip()
    return latitude + ', ' + longitude

In [5]:
def capitals_locations(url):
    res = []
    links = get_capital_links(url)
    for link in links:
        lat_long = get_lat_long('https://pl.wikipedia.org' + link[2])
        res.append([link[0], link[1], lat_long])
    return res

In [6]:
def users_location(url):
    return get_lat_long(url)

In [7]:
def convert_to_decimal(coord_str):
    degrees, minutes, seconds, direction = '0', '0', '0', ''
    if '°' in coord_str:
        degrees, direction = coord_str.split('°')
        if '′' in direction:
            minutes, direction = direction.split('′')
            if '″' in direction:
                seconds, direction = direction.split('″')
    return (float(degrees.replace(',', '.')) + float(minutes.replace(',', '.')) / 60 + float(seconds.replace(',', '.')) / 3600) * (-1 if direction in ['W', 'S'] else 1)



In [8]:
def calculate_distance(coord1, coord2):
    # Convert coordinates to decimal format
    if type(coord1) == float or type(coord2) == float:
        return None
    coord1 = coord1.split(', ')
    coord2 = coord2.split(', ')
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    lat1, lon1 = convert_to_decimal(lat1), convert_to_decimal(lon1)
    lat2, lon2 = convert_to_decimal(lat2), convert_to_decimal(lon2)

    # Calculate distance using geodesic function
    distance = geodesic((lat1, lon1), (lat2, lon2)).km
    # print(coord1, coord2, distance)
    return distance

In [13]:
capitals_loc = capitals_locations("https://pl.wikipedia.org/wiki/Lista_pa%C5%84stw_%C5%9Bwiata")
capitals_df = pd.DataFrame(capitals_loc, columns=['Country', 'Capital', 'Coordinates'])

In [23]:
capitals_df.head()

Unnamed: 0,Country,Capital,Coordinates
0,Afganistan,Kabul,"34°32′N, 69°10′E"
1,Albania,Tirana,"41°19′N, 19°49′E"
2,Algieria,Algier,"36°47′N, 3°04′E"
3,Andora,Andora,"42°30′N, 1°30′E"
4,Angola,Luanda,"8°50′S, 13°14′E"


In [15]:
# Reading Countries GDP
res = requests.get('https://pl.wikipedia.org/wiki/Lista_pa%C5%84stw_%C5%9Bwiata_wed%C5%82ug_PKB_nominalnego_per_capita')
soup = BeautifulSoup(res.text, 'html.parser')
tables = soup.select('.wikitable')
wiki_df = pd.read_html(str(tables))[0]

In [24]:
# Reading User Preferences / Score of Countries
user_df = pd.read_csv('/content/drive/MyDrive/trip_rating/countries_rating.csv', encoding="UTF-8")

In [38]:
# Working on Data
result = pd.merge(wiki_df, user_df, 'outer', on='Państwo')
result = result.drop(result.columns[[2, 3, 4, 5, 6]], axis=1)
result['2021 r.'] = pd.to_numeric(result['2021 r.'][result['2021 r.'] != 'b.d.'].str.replace(' ', '')) # Getting rid of blank space inside the number
maximum_PKB = result['2021 r.'].max()
result = result.dropna()
result['Users_rate_avg'] = (result['Rate_1'] + result['Rate_2'])/2      # average points from users
result['Rate_PKB'] = abs(result['2021 r.'] / maximum_PKB - 1) * 5      # normalizing reversing point so the cheapest countries get the most points


result = result.reset_index()
result = result.drop(['Poz.', 'index'], axis=1)

In [39]:
result = result[['Państwo','2021 r.','Rate_PKB','Rate_1','Rate_2','Users_rate_avg']]
result.head()

Unnamed: 0,Państwo,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg
0,Luksemburg,136701.0,0.0,6.0,7.5,6.75
1,Irlandia,99013.0,1.378483,9.0,8.0,8.5
2,Szwajcaria,93720.0,1.572081,8.0,8.0,8.0
3,Norwegia,89090.0,1.741428,9.0,9.0,9.0
4,Singapur,72795.0,2.337437,9.0,10.0,9.5


In [40]:
# managing distance from users location to capital of the country to estimate trip cost
result.rename(columns={"Państwo": "Country"}, inplace=True)
result = pd.merge(result, capitals_df, 'outer', on='Country')
result.head()

Unnamed: 0,Country,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg,Capital,Coordinates
0,Luksemburg,136701.0,0.0,6.0,7.5,6.75,Luksemburg,"49°36′38″N, 6°07′58″E"
1,Irlandia,99013.0,1.378483,9.0,8.0,8.5,Dublin,"53°20′N, 6°15′W"
2,Szwajcaria,93720.0,1.572081,8.0,8.0,8.0,"brak oficjalnej stolicy, siedzibą rządu jest B...","46,94798°N, 7,44743°E"
3,Norwegia,89090.0,1.741428,9.0,9.0,9.0,Oslo,"59°54′46,79″N, 10°44′16,79″E"
4,Singapur,72795.0,2.337437,9.0,10.0,9.5,Singapur,"1°18′N, 103°48′E"


In [41]:
# getting location of the user
user_coordinates = get_lat_long('https://pl.wikipedia.org/wiki/Warszawa')

In [42]:
# user_coordinates = user_coordinates[0] + ', ' + user_coordinates[1]
result['User_coordinates'] = user_coordinates
result['Distance'] = result.apply(lambda row: calculate_distance(row['Coordinates'], row['User_coordinates']), axis=1)

In [43]:
maximum_distance = result['Distance'].max()
result['Distance_rate'] = abs(result['Distance'] / maximum_distance - 1) * 5

In [44]:
result.head()

Unnamed: 0,Country,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg,Capital,Coordinates,User_coordinates,Distance,Distance_rate
0,Luksemburg,136701.0,0.0,6.0,7.5,6.75,Luksemburg,"49°36′38″N, 6°07′58″E","52°13′56″N, 21°00′30″E",1083.694609,4.693701
1,Irlandia,99013.0,1.378483,9.0,8.0,8.5,Dublin,"53°20′N, 6°15′W","52°13′56″N, 21°00′30″E",1832.075716,4.482176
2,Szwajcaria,93720.0,1.572081,8.0,8.0,8.0,"brak oficjalnej stolicy, siedzibą rządu jest B...","46,94798°N, 7,44743°E","52°13′56″N, 21°00′30″E",1140.474862,4.677652
3,Norwegia,89090.0,1.741428,9.0,9.0,9.0,Oslo,"59°54′46,79″N, 10°44′16,79″E","52°13′56″N, 21°00′30″E",1065.36557,4.698882
4,Singapur,72795.0,2.337437,9.0,10.0,9.5,Singapur,"1°18′N, 103°48′E","52°13′56″N, 21°00′30″E",9402.791209,2.342364


In [45]:
result['Summary'] = result['Rate_PKB'] + result['Users_rate_avg'] + result['Distance_rate']
result = result.sort_values(by=['Summary'], ascending=False)
result = result.reset_index(drop=True)

In [46]:
result.head()

Unnamed: 0,Country,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg,Capital,Coordinates,User_coordinates,Distance,Distance_rate,Summary
0,Hiszpania,30090.0,3.899423,10.0,10.0,10.0,Madryt,"40°25′04″N, 3°41′41″W","52°13′56″N, 21°00′30″E",2293.238533,4.351831,18.251254
1,Gruzja,5014.0,4.816607,9.0,8.5,8.75,Tbilisi,"41°43′N, 44°47′E","52°13′56″N, 21°00′30″E",2138.528293,4.395559,17.962166
2,Estonia,27282.0,4.002129,9.0,9.0,9.0,Tallinn,"59°26′N, 24°45′E","52°13′56″N, 21°00′30″E",835.003493,4.763992,17.766121
3,Czarnogóra,9350.0,4.658013,9.0,7.5,8.25,Podgorica,"42°28′N, 19°16′E","52°13′56″N, 21°00′30″E",1093.568409,4.69091,17.598923
4,Holandia,58292.0,2.867901,10.0,10.0,10.0,"Amsterdam (stolica konstytucyjna), Haga (stoli...","52°22′N, 4°54′E","52°13′56″N, 21°00′30″E",1096.721221,4.690019,17.557921


In [48]:
print(maximum_PKB)
result.to_csv('/content/drive/MyDrive/trip_rating/result.csv')

136701.0


In [51]:
result.head()

Unnamed: 0,Country,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg,Capital,Coordinates,User_coordinates,Distance,Distance_rate,Summary
0,Hiszpania,30090.0,3.899423,10.0,10.0,10.0,Madryt,"40°25′04″N, 3°41′41″W","52°13′56″N, 21°00′30″E",2293.238533,4.351831,18.251254
1,Gruzja,5014.0,4.816607,9.0,8.5,8.75,Tbilisi,"41°43′N, 44°47′E","52°13′56″N, 21°00′30″E",2138.528293,4.395559,17.962166
2,Estonia,27282.0,4.002129,9.0,9.0,9.0,Tallinn,"59°26′N, 24°45′E","52°13′56″N, 21°00′30″E",835.003493,4.763992,17.766121
3,Czarnogóra,9350.0,4.658013,9.0,7.5,8.25,Podgorica,"42°28′N, 19°16′E","52°13′56″N, 21°00′30″E",1093.568409,4.69091,17.598923
4,Holandia,58292.0,2.867901,10.0,10.0,10.0,"Amsterdam (stolica konstytucyjna), Haga (stoli...","52°22′N, 4°54′E","52°13′56″N, 21°00′30″E",1096.721221,4.690019,17.557921


In [49]:
result.tail(20)

Unnamed: 0,Country,2021 r.,Rate_PKB,Rate_1,Rate_2,Users_rate_avg,Capital,Coordinates,User_coordinates,Distance,Distance_rate,Summary
176,Botswana,7417.0,4.728714,1.0,2.0,1.5,Gaborone,"24°40′S, 25°55′E","52°13′56″N, 21°00′30″E",8531.755633,2.588556,8.817271
177,Eswatini,4109.0,4.849708,1.0,1.5,1.25,Mbabane,"26°19′S, 31°08′E","52°13′56″N, 21°00′30″E",8757.01975,2.524887,8.624595
178,Brunei,44809.0,3.361058,3.0,3.0,3.0,Bandar Seri Begawan,"4°55′N, 114°55′E","52°13′56″N, 21°00′30″E",9845.337564,2.217281,8.578339
179,Salwador,4345.0,4.841077,1.0,2.0,1.5,San Salvador,"13°41′N, 89°11′W","52°13′56″N, 21°00′30″E",10136.849124,2.134887,8.475964
180,Palau,12187.0,4.554246,2.0,2.0,2.0,Ngerulmud,"7°30′02″N, 134°37′27″E","52°13′56″N, 21°00′30″E",10912.155774,1.915752,8.469998
181,Kiribati,1706.0,4.937601,2.0,2.0,2.0,Bairiki,"1°19′32″N, 172°59′00″E","52°13′56″N, 21°00′30″E",13516.785527,1.179571,8.117172
182,Belize,4177.0,4.847221,1.0,1.0,1.0,Belmopan,"17°16′N, 88°47′W","52°13′56″N, 21°00′30″E",9788.058271,2.233471,8.080692
183,Honduras,2790.0,4.897952,1.0,1.0,1.0,Tegucigalpa,"14°05′39″N, 87°12′24″W","52°13′56″N, 21°00′30″E",9976.086405,2.180326,8.078278
184,Nikaragua,2177.0,4.920374,1.0,1.0,1.0,Managua,"12°08′11,76″N, 86°15′05,03″W","52°13′56″N, 21°00′30″E",10091.391808,2.147736,8.068109
185,Nauru,10139.0,4.629154,1.0,3.0,2.0,Yaren,"0°32′36″S, 166°55′11″E","52°13′56″N, 21°00′30″E",13459.660091,1.195717,7.824871
