In [1]:
# Import libraries
import pandas as pd

In [2]:
# load first dataset to be modified from CSV
location_df = pd.read_csv('../Resources/Processed/location.csv')
location_df

Unnamed: 0,Resort ID,Resort Name,Continent,Country,State/Province,Country Code
0,1,KitzSki – Kitzbühel/Kirchberg,Europe,Austria,Tyrol (Tirol),AT
1,2,Ischgl/Samnaun – Silvretta Arena,Europe,Austria,Tyrol (Tirol),AT
2,3,SkiWelt Wilder Kaiser-Brixental,Europe,Austria,Tyrol (Tirol),AT
3,4,Ski Arlberg,Europe,Austria,Tyrol (Tirol),AT
4,5,Serfaus-Fiss-Ladis,Europe,Austria,Tyrol (Tirol),AT
...,...,...,...,...,...,...
3220,5445,mozir,Europe,Belarus,Belarus,BY
3221,5446,republican-skicenter-minsk,Europe,Belarus,Belarus,BY
3222,5447,silichy,Europe,Belarus,Belarus,BY
3223,5467,serra-da-estrela,Europe,Portugal,Portugal,PT


In [3]:
# load second dataset to be modified from CSV
price_df = pd.read_csv('../Resources/Processed/price.csv')
price_df

Unnamed: 0,Resort ID,Adult,Currency
0,1,55.0,GBP
1,2,53.0,GBP
2,3,49.0,GBP
3,4,53.0,GBP
4,5,51.0,GBP
...,...,...,...
5131,5474,0.0,-
5132,5475,0.0,-
5133,5476,69500.0,Mongolian togrog
5134,5477,0.0,-


In [4]:
# Merge the two DataFrames on Resort id
merged_df = pd.merge(location_df, price_df, on='Resort ID')

merged_df

Unnamed: 0,Resort ID,Resort Name,Continent,Country,State/Province,Country Code,Adult,Currency
0,1,KitzSki – Kitzbühel/Kirchberg,Europe,Austria,Tyrol (Tirol),AT,55.0,GBP
1,2,Ischgl/Samnaun – Silvretta Arena,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP
2,3,SkiWelt Wilder Kaiser-Brixental,Europe,Austria,Tyrol (Tirol),AT,49.0,GBP
3,4,Ski Arlberg,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP
4,5,Serfaus-Fiss-Ladis,Europe,Austria,Tyrol (Tirol),AT,51.0,GBP
...,...,...,...,...,...,...,...,...
2938,5445,mozir,Europe,Belarus,Belarus,BY,0.0,-
2939,5446,republican-skicenter-minsk,Europe,Belarus,Belarus,BY,0.0,-
2940,5447,silichy,Europe,Belarus,Belarus,BY,0.0,-
2941,5467,serra-da-estrela,Europe,Portugal,Portugal,PT,25.0,GBP


In [5]:
# Filter out rows with 0.0 or no value in the adult column
filtered_df = merged_df[(merged_df['Adult'] != 0.0) & (merged_df['Adult'].notnull())]

filtered_df

Unnamed: 0,Resort ID,Resort Name,Continent,Country,State/Province,Country Code,Adult,Currency
0,1,KitzSki – Kitzbühel/Kirchberg,Europe,Austria,Tyrol (Tirol),AT,55.0,GBP
1,2,Ischgl/Samnaun – Silvretta Arena,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP
2,3,SkiWelt Wilder Kaiser-Brixental,Europe,Austria,Tyrol (Tirol),AT,49.0,GBP
3,4,Ski Arlberg,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP
4,5,Serfaus-Fiss-Ladis,Europe,Austria,Tyrol (Tirol),AT,51.0,GBP
...,...,...,...,...,...,...,...,...
2927,5390,sljeme-medvednica-zagreb,Europe,Croatia,Continental Croatia (Kontinentalna Hrvatska),HR,100.0,Croatian Kuna
2928,5391,velebno-baske-ostarije,Europe,Croatia,Croatia,HR,60.0,Croatian Kuna
2934,5440,kuutse-hill-otepaeae,Europe,Estonia,Valga,EE,24.0,GBP
2936,5442,vimka-viimsi-maeepark,Europe,Estonia,Estonia,EE,18.0,GBP


In [6]:
# Find unique values in the Currency column
unique_currencies = filtered_df['Currency'].unique()
print(unique_currencies)

['GBP' 'Bosnia convertible mark' 'Ukrainian hryvnia' 'Icelandic krona'
 'Romanian leu' 'UK Pound' 'Hungarian forint' 'Croatian Kuna']


In [7]:
# Define exchange rates for conversion to GBP
exchange_rates = {
    'GBP': 1.0,
    'Bosnia convertible mark': 0.44,
    'Ukrainian hryvnia': 0.021,
    'Icelandic krona': 0.0057,
    'Romanian leu': 0.17,
    'UK Pound': 1.0,
    'Hungarian forint': 0.0022,
    'Croatian Kuna': 0.11329
}

# Perform currency conversion
def convert_to_gbp(row):
    currency = row['Currency']
    conversion_rate = exchange_rates.get(currency)
    if conversion_rate:
        return row['Adult'] * conversion_rate
    else:
        return row['Adult']

# Apply the conversion function to the Adult column
filtered_df['Adult_GBP'] = filtered_df.apply(convert_to_gbp, axis=1)

filtered_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Adult_GBP'] = filtered_df.apply(convert_to_gbp, axis=1)


Unnamed: 0,Resort ID,Resort Name,Continent,Country,State/Province,Country Code,Adult,Currency,Adult_GBP
0,1,KitzSki – Kitzbühel/Kirchberg,Europe,Austria,Tyrol (Tirol),AT,55.0,GBP,55.0000
1,2,Ischgl/Samnaun – Silvretta Arena,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP,53.0000
2,3,SkiWelt Wilder Kaiser-Brixental,Europe,Austria,Tyrol (Tirol),AT,49.0,GBP,49.0000
3,4,Ski Arlberg,Europe,Austria,Tyrol (Tirol),AT,53.0,GBP,53.0000
4,5,Serfaus-Fiss-Ladis,Europe,Austria,Tyrol (Tirol),AT,51.0,GBP,51.0000
...,...,...,...,...,...,...,...,...,...
2927,5390,sljeme-medvednica-zagreb,Europe,Croatia,Continental Croatia (Kontinentalna Hrvatska),HR,100.0,Croatian Kuna,11.3290
2928,5391,velebno-baske-ostarije,Europe,Croatia,Croatia,HR,60.0,Croatian Kuna,6.7974
2934,5440,kuutse-hill-otepaeae,Europe,Estonia,Valga,EE,24.0,GBP,24.0000
2936,5442,vimka-viimsi-maeepark,Europe,Estonia,Estonia,EE,18.0,GBP,18.0000


In [8]:
# Drop Adult and Currency columns
filtered_df.drop(columns=['Adult', 'Currency'], inplace=True)

# Rename Adult_GBP column to Adult Price
filtered_df.rename(columns={'Adult_GBP': 'Adult Price(£)'}, inplace=True)
# Set the values in the Adult Price column to 2 decimal points
filtered_df['Adult Price(£)'] = filtered_df['Adult Price(£)'].round(2)
filtered_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.drop(columns=['Adult', 'Currency'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'Adult_GBP': 'Adult Price(£)'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Adult Price(£)'] = filtered_df['Adult Price(£)'].round(2)


Unnamed: 0,Resort ID,Resort Name,Continent,Country,State/Province,Country Code,Adult Price(£)
0,1,KitzSki – Kitzbühel/Kirchberg,Europe,Austria,Tyrol (Tirol),AT,55.00
1,2,Ischgl/Samnaun – Silvretta Arena,Europe,Austria,Tyrol (Tirol),AT,53.00
2,3,SkiWelt Wilder Kaiser-Brixental,Europe,Austria,Tyrol (Tirol),AT,49.00
3,4,Ski Arlberg,Europe,Austria,Tyrol (Tirol),AT,53.00
4,5,Serfaus-Fiss-Ladis,Europe,Austria,Tyrol (Tirol),AT,51.00
...,...,...,...,...,...,...,...
2927,5390,sljeme-medvednica-zagreb,Europe,Croatia,Continental Croatia (Kontinentalna Hrvatska),HR,11.33
2928,5391,velebno-baske-ostarije,Europe,Croatia,Croatia,HR,6.80
2934,5440,kuutse-hill-otepaeae,Europe,Estonia,Valga,EE,24.00
2936,5442,vimka-viimsi-maeepark,Europe,Estonia,Estonia,EE,18.00


In [9]:
# Save the modified DataFrame as a new CSV file
filtered_df.to_csv('../Resources/Processed/locationsPriced.csv', index=False)

In [10]:
# Save the DataFrame as a JSON file
filtered_df.to_json('../Resources/Processed/locationsPriced.json', orient='records')