### The purpose of this notebook is to select and prepare the data processed in the other notebooks for usage in R. Since python isn't great for geospatial analysis, most notably KDE, these functions will be done in R.

In [None]:
# Import the necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import sqlite3
sns.set()

In [None]:
Tourists = pd.read_csv('tourists_total.csv', sep= ',', low_memory = False, lineterminator='\n')
TouristsAttachments = pd.read_csv('tourists_attachments.csv', sep= ',', low_memory = False, lineterminator='\n')
TouristsFlickr = pd.read_csv('Flickr_Tourists.csv', sep= ',', low_memory = False, lineterminator='\n')


In [None]:
TouristsFlickr.dtypes

In [None]:
Tourists_Small = Tourists[['geo_lon', 'geo_lat']]
TouristsFlickr_Small = TouristsFlickr[['longitude', 'latitude']]
TouristsAttachments_Small = TouristsAttachments[['geo_lon', 'geo_lat']]

In [None]:
Tourists_Small['geo_lon'].describe()

In [None]:
Tourists_Small['geo_lat'].describe()

In [None]:
TouristsAttachments_Small['geo_lon'].describe()

In [None]:
TouristsAttachments_Small['geo_lat'].describe()

In [None]:
TouristsFlickr_Small['longitude'].describe()

In [None]:
TouristsFlickr_Small['latitude'].describe()

In [None]:
Tourists_Small = Tourists_Small[Tourists_Small['geo_lon'] > 4]
TouristsAttachments_Small = TouristsAttachments_Small[TouristsAttachments_Small['geo_lon'] > 4]

In [None]:
# Inspect which values are extremely common and may need to be deleted from the dataset.
Tourists_Small["unique"] = Tourists_Small["geo_lon"].map(str) + Tourists_Small["geo_lat"].map(str)
TouristsAttachments_Small["unique"] = TouristsAttachments_Small["geo_lon"].map(str) + TouristsAttachments_Small["geo_lat"].map(str)

TouristsFlickr_Small["unique"] = TouristsFlickr_Small['longitude'].map(str) + TouristsFlickr_Small['latitude'].map(str)

In [None]:
# Get the unique value counts, this displays which spots can greatly influence the KDE heatmap.
Tourists_Small['unique'].value_counts()

In [None]:
TouristsAttachments_Small['unique'].value_counts()

In [None]:
# Get the unique value counts, this displays which spots can greatly influence the KDE heatmap.
TouristsFlickr_Small['unique'].value_counts()

In [None]:
# Drop everything that has more than 200.
Tourists_Small = Tourists_Small[Tourists_Small['unique'] != "4.551.9167"]
Tourists_Small = Tourists_Small[Tourists_Small['unique'] != "4.475251.9235"]
Tourists_Small = Tourists_Small[Tourists_Small['unique'] != "4.4694936351.92508148"]
Tourists_Small = Tourists_Small[Tourists_Small['unique'] != "4.4784751.92286"]

In [None]:
#Drop everything that has more than 200.
TouristsAttachments_Small = TouristsAttachments_Small[TouristsAttachments_Small['unique'] != "4.551.9167"]

In [None]:
# Drop everything that has more than 150 for flickr as well.
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.48096251.918332']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.4732851.897813']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.47976151.970043']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.48133851.917663']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.59335851.931359']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.48245451.967406']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.49439151.882383000000004']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.47898951.921101']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.45181251.927397']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.52621600000000251.894079']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.49363351.897605']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.45725751.916393']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.63191851.844264']
TouristsFlickr_Small = TouristsFlickr_Small[TouristsFlickr_Small['unique'] != '4.47366751.916194']

In [None]:
TouristsFlickr_Small['unique'].value_counts()

In [None]:
TouristsAttachments_Small['unique'].value_counts()

In [None]:
# Get the unique value counts, this displays which spots can greatly influence the KDE heatmap.
Tourists_Small['unique'].value_counts()

In [None]:
# Drop the unique column.
Tourists_Small = Tourists_Small.drop(columns=('unique'))
TouristsAttachments_Small = TouristsAttachments_Small.drop(columns=('unique'))
TouristsFlickr_Small = TouristsFlickr_Small.drop(columns=('unique'))

In [None]:
# Rename the columns to combine with the attachment dataset.
TouristsFlickr_Small.rename(columns={'longitude':'geo_lon', 'latitude':'geo_lat'}, inplace=True)

In [None]:
# Merge attachment set with flickr set.
frames = [TouristsAttachments_Small, TouristsFlickr_Small]
Photoset = pd.concat(frames)

In [None]:
Tourists_Small.to_csv('tourists_geo.csv', index=False)

In [None]:
TouristsAttachments_Small.to_csv('tourists_geo_attachments.csv', index=False)

In [None]:
TouristsFlickr_Small.to_csv('FlickrTourists_geo.csv', index=False)

In [None]:
Photoset.to_csv('Tourists_Photo.csv', index=False)