In [None]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import plotly.express as px

In [None]:
# reading tour dataframes
df = pd.read_csv(r'..\EDA\clean_df.csv')

In [None]:
# drop the Unnamed: 0 column
df.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
# show randomly 15 rows
df.sample(n = 15)

### *Questions:*
In this section I will answer analytical questions related to the data.
<br>

##### 1. How long most tours are? What is the average tour price for that?

In [None]:
# find the median of the Duration
median_Duration = df['Duration'].median()
print('The median duration for the tours in this dataset is', median_Duration ,'hours long')

In [None]:
# split the Duration in the dataset into two lists less than the median and greater than the median (4 hours long)
short = df.query('Duration < {}'.format(median_Duration))
long = df.query('Duration >= {}'.format(median_Duration))

# split the average Price based on the long of duration
short_price = short['Price'].mean()
long_price = long['Price'].mean()

In [None]:
# bar plot to find the average price for each type of tour duration (short & long)
locations = [1, 2]
heights = [short_price, long_price]
labels = ['short Tour', 'long Tour']
plt.figure(figsize=(15,8))
plt.bar(locations, heights, tick_label=labels,color=['#458B74'])
plt.title('Average Price by Tour Duration Time',fontsize=22, color ='#000000')
plt.xticks( color ='#000000',fontsize=12)
plt.yticks(color ='#000000',fontsize=12)
plt.xlabel('Duration in hours',fontsize=15, color ='#000000')
plt.ylabel('Average Price in $',fontsize=15, color ='#000000');

##### 2. How does the tour duration affect the tour price?

In [None]:
# scatter plot to show the relationship between the duration & price
x = df['Duration']
y = df['Price']
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(20,10))
plt.scatter(x, y, c='#458B74', s=100, alpha=0.5)
plt.xticks( color ='#000000',fontsize=12)
plt.yticks(color ='#000000',fontsize=12)
plt.xlabel('Tour Duration in hours',fontsize=20, color ='#000000')
plt.ylabel('Tour Price in $',fontsize=20, color ='#000000')
plt.title('Tour Duration vs Tour Price',fontsize=22, color ='#000000');

##### 3. Compare the tours average price of different countries.
*3.1 Regardless the other features* 

In [None]:
# group the average price by the country column regardless other fetures
groups = df.groupby(['Country']).Price.agg(['mean'])
groups.plot.bar(figsize=(20,10), color=('#458B74'))
plt.title('Average Tour Price in Each Country',fontsize=22, color ='#000000')
plt.xlabel('Country',fontsize=22, color ='#000000')
plt.ylabel('Average Price in $',fontsize=22, color ='#000000')
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
plt.show()

In [None]:
# check the maximum price if the country == Oman 
df.loc[df['Country']== 'Oman']['Price'].max()

In [None]:
# check why Oman has the highest average
df.loc[df['Price']==5452.47]

In [None]:
# groupby the Price per adult/group 
pd.crosstab(df['Country'],df['Price Per']).plot.bar(figsize=(20,10), color=('#458B74','#E3CF57'))

plt.title('Average Price per (Adult/Group) in Each Country',fontsize=22, color ='#000000')
plt.xlabel('Country',fontsize=22, color ='#000000')
plt.ylabel('Average Price in $',fontsize=22, color ='#000000')
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
plt.show()

##### 4. Compare distribution of tour price for different cities (can be reproducible)

In [None]:
# histogram to show the distribution of the price for a specific city (Change the name of the city)
city = df[df['City'] == 'Beijing']
city['Price'].hist(figsize=(15,8), color= ('#458B74'))
plt.xlabel('Price in $',fontsize=12, color ='#000000')
plt.ylabel('Density',fontsize=12, color ='#000000')
plt.title('Distribution of the Tour Price per City (Beijing)',fontsize=22, color ='#000000');

In [None]:
# histogram to show the distribution of the price for a specific city (Change the name of the city)
city = df[df['City'] == 'Shanghai']
city['Price'].hist(figsize=(15,8),color= ('#458B74'))
plt.xlabel('Price in $',fontsize=12, color ='#000000')
plt.ylabel('Density',fontsize=12, color ='#000000')
plt.title('Distribution of the Tour Price per City (Shanghai)',fontsize=22, color ='#000000');

In [None]:
# histogram to show the distribution of the price for a specific city (Change the name of the city)
city = df[df['City'] == 'Hong Kong']
city['Price'].hist(figsize=(15,8), color= ('#458B74'))
plt.xlabel('Price in $',fontsize=12, color ='#000000')
plt.ylabel('Density',fontsize=12, color ='#000000')
plt.title('Distribution of the Tour Price per City (Hong Kong)',fontsize=22, color ='#000000');

##### 5. How does the tour type affect the tour price?

In [None]:
# What is the average price in each Type?
df.groupby(['Type']).Price.agg(['mean']).plot.bar(figsize=(50,20), color=('#458B74'))
plt.title('Average Tour Price in based on Tour Type',fontsize=45, color ='#000000')
plt.xlabel('Tour Type',fontsize=35, color ='#000000')
plt.ylabel('Average Price in $',fontsize=35, color ='#000000')
plt.yticks(fontsize=20)
plt.xticks(fontsize=20)
plt.show()

### City Map

In [None]:
# see the unique values in City column to add the Latitude and Longitude columns 
df.City.unique()

In [None]:
# add Latitude for each city
def Latitude (row):
   if row['City'] == 'Abu Dhabi' :
      return 24.4539
   if row['City'] == 'Al Ula' :
      return 26.6031
   if row['City'] == 'Antalya' :
      return 36.8969
   if row['City'] == 'Beijing':
      return 39.9042
   if row['City']  == 'Berlin':
      return 52.5200
   if row['City'] == 'Buenos Aires':
      return 34.6037
   if row['City'] == 'Cairo':
      return 30.0444
   if row['City'] == 'Cape Town':
      return 33.9249
   if row['City']  == 'Chicago':
      return 41.8781
   if row['City'] == 'Riyadh':
      return 24.7136
   if row['City'] == 'Athens':
      return 37.9838
   if row['City'] == 'Doha':
      return 25.2854
   if row['City']  == 'Istanbul':
      return 41.0082
   if row['City'] == 'Jeddah':
      return 21.4858
   if row['City'] == 'Kuala Lumpur':
      return 3.1390
   if row['City'] == 'London':
      return 51.5072
   if row['City']  == 'Madrid':
      return 40.4168
   if row['City'] == 'Marrakech':
      return 31.6295
   if row['City'] == 'Mumbai':
      return 19.0760
   if row['City'] == 'Muscat':
      return 23.5880
   if row['City']  == 'New York':
      return 40.7128
   if row['City'] == 'Orlando':
      return 28.5384
   if row['City'] == 'Rio de Janeiro':
      return 22.9068
   if row['City'] == 'Paris':
      return 48.8566
   if row['City'] == 'Rome':
      return 41.9028
   if row['City'] == 'Singapore':
      return 1.3521
   if row['City'] == 'Shanghai':
      return 31.2304
   if row['City'] == 'Seoul':
      return 37.5665
   if row['City'] == 'Tokyo':
      return 35.6762
   if row['City'] == 'Amsterdam':
      return 52.3676
   if row['City'] == 'Barcelona':
      return 41.3874
   if row['City'] == 'Hong Kong':
      return 22.3193
   if row['City'] == 'Los Angeles':
      return 34.0522
   if row['City'] == 'Las Vegas':
      return 36.1699
   if row['City'] == 'Mexico City':
      return 19.4326
   if row['City'] == 'Sydney':
      return 33.8688
   if row['City'] == 'Toronto':
      return 43.6532
   if row['City'] == 'Vienna':
      return 48.2082
   if row['City'] == 'Dubai':
      return 55.2708
   return 'Other'

In [None]:
# add Longitude for each city
def Longitude (row):
   if row['City'] == 'Abu Dhabi' :
      return 54.3773
   if row['City'] == 'Al Ula' :
      return 37.9295
   if row['City'] == 'Antalya' :
      return 30.7133
   if row['City'] == 'Beijing':
      return 116.4074
   if row['City']  == 'Berlin':
      return 13.4050
   if row['City'] == 'Buenos Aires':
      return 58.3816
   if row['City'] == 'Cairo':
      return 31.2357
   if row['City'] == 'Cape Town':
      return 18.4241
   if row['City']  == 'Chicago':
      return 87.6298
   if row['City'] == 'Riyadh':
      return 46.6753
   if row['City'] == 'Athens':
      return 23.7275
   if row['City'] == 'Doha':
      return 51.5310
   if row['City']  == 'Istanbul':
      return 28.9784
   if row['City'] == 'Jeddah':
      return 39.1925
   if row['City'] == 'Kuala Lumpur':
      return 101.6869
   if row['City'] == 'London':
      return 0.1276
   if row['City']  == 'Madrid':
      return 3.7038
   if row['City'] == 'Marrakech':
      return 7.9811
   if row['City'] == 'Mumbai':
      return 72.8777
   if row['City'] == 'Muscat':
      return 58.3829
   if row['City']  == 'New York':
      return 74.0060
   if row['City'] == 'Orlando':
      return 81.3789
   if row['City'] == 'Rio de Janeiro':
      return 43.1729
   if row['City'] == 'Paris':
      return 2.3522
   if row['City'] == 'Rome':
      return 12.4964
   if row['City'] == 'Singapore':
      return 103.8198
   if row['City'] == 'Shanghai':
      return 121.4737
   if row['City'] == 'Seoul':
      return 126.9780
   if row['City'] == 'Tokyo':
      return 139.6503
   if row['City'] == 'Amsterdam':
      return 4.9041
   if row['City'] == 'Barcelona':
      return 2.1686
   if row['City'] == 'Hong Kong':
      return 114.1694
   if row['City'] == 'Los Angeles':
      return 118.2437
   if row['City'] == 'Las Vegas':
      return 115.1398
   if row['City'] == 'Mexico City':
      return 99.1332
   if row['City'] == 'Sydney':
      return 151.2093
   if row['City'] == 'Toronto':
      return 79.3832
   if row['City'] == 'Vienna':
      return 16.3738
   if row['City'] == 'Dubai':
      return 25.2048
   return 'Other'

In [None]:
# add new column for Latitude
df['Latitude'] = df.apply (lambda row: Latitude(row), axis=1)

In [None]:
# add new column for Longitude
df['Longitude'] = df.apply (lambda row: Longitude(row), axis=1)

In [None]:
# to check the new columns
df.sample()

In [None]:
# map dictionary 
my_map = folium.Map(
    location= [25.3800, 49.5888],
    zoom_start=2
)
my_map

# loop all the cities in the dataset
for _, city in df.iterrows():
    folium.Marker(
        location=[city['Latitude'], city['Longitude']],
        popup= city['City'],
        tooltip= city['City']
        ).add_to(my_map)

my_map

In [None]:
# saving the map
my_map.save('tourCitiesMap.html')