In [5]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
import matplotlib as plt
import tensorflow as tf

# Provide the file path to your local dataset
file_path = "/Users/mavingill/Downloads/Greece_Travel_Data/travel_tourism_dataset.csv"

# Load the dataset from the specified file
greece_travel_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
greece_travel_data.head()

Unnamed: 0,Trip #no.,Duration,Cost of Travel(Entire Trip),Mode of Travel,Stay,First Name,Last Name,Date of Birth,Address,Age,Sex,Nationality,Date of Travel
0,55,1,"$20,111.00",Flight,Hotel,Cristina,Fuentes,6/14/14,"5143 Pope Camp Apt. 028\nLake Emily, UT 71371",74,Non-Binary,Lao People's Democratic Republic,9/13/22
1,288,5,"$29,681.00",Car,Hotel,Patricia,Young,9/25/23,716 Dominguez Row Suite 775\nSouth Tiffanyboro...,19,Female,Gabon,9/4/21
2,291,7,"$24,950.00",Flight,Airbnb,Christina,Kirby,1/2/35,"PSC 4600, Box 1237\nAPO AE 32510",36,Female,Syrian Arab Republic,1/25/21
3,131,19,"$18,676.00",Roadtrip,Airbnb,Michael,Hudson,4/21/82,"7013 Bryant Club Suite 695\nSouth Tina, WI 26739",38,Non-Binary,Afghanistan,1/28/22
4,281,28,"$18,899.00",Flight,Airbnb,Francisco,Hensley,6/29/13,"074 Scott Brook Suite 700\nLukeville, VA 46541",38,Non-Binary,Uruguay,10/21/21


In [6]:
# Drop the non-beneficial ID columns, 'Trip #no.'.
greece_travel_data = greece_travel_data.drop(columns = ['Trip #no.'])
greece_travel_data

Unnamed: 0,Duration,Cost of Travel(Entire Trip),Mode of Travel,Stay,First Name,Last Name,Date of Birth,Address,Age,Sex,Nationality,Date of Travel
0,1,"$20,111.00",Flight,Hotel,Cristina,Fuentes,6/14/14,"5143 Pope Camp Apt. 028\nLake Emily, UT 71371",74,Non-Binary,Lao People's Democratic Republic,9/13/22
1,5,"$29,681.00",Car,Hotel,Patricia,Young,9/25/23,716 Dominguez Row Suite 775\nSouth Tiffanyboro...,19,Female,Gabon,9/4/21
2,7,"$24,950.00",Flight,Airbnb,Christina,Kirby,1/2/35,"PSC 4600, Box 1237\nAPO AE 32510",36,Female,Syrian Arab Republic,1/25/21
3,19,"$18,676.00",Roadtrip,Airbnb,Michael,Hudson,4/21/82,"7013 Bryant Club Suite 695\nSouth Tina, WI 26739",38,Non-Binary,Afghanistan,1/28/22
4,28,"$18,899.00",Flight,Airbnb,Francisco,Hensley,6/29/13,"074 Scott Brook Suite 700\nLukeville, VA 46541",38,Non-Binary,Uruguay,10/21/21
...,...,...,...,...,...,...,...,...,...,...,...,...
2995,24,"$17,801.00",Roadtrip,Resorts,Kerry,Allen,6/2/02,"93471 Joyce Courts Apt. 859\nMitchellside, OK ...",73,Non-Binary,Mali,7/3/23
2996,16,"$23,410.00",Train,Hotel,Kevin,Johnson,7/28/16,"921 Katie Parkways Apt. 558\nWest Lawrence, CT...",50,Male,Mexico,8/22/22
2997,22,"$5,656.00",Roadtrip,Hotel,William,Mclaughlin,7/25/17,"99384 Shirley Pine Apt. 439\nEast Angela, ND 1...",36,Non-Binary,American Samoa,2/8/21
2998,15,"$27,509.00",Car,Airbnb,Marcus,Ray,7/7/62,"PSC 1488, Box 2535\nAPO AP 58905",48,Female,Jersey,6/19/21


In [7]:
# Determine the number of unique values in each column.
greece_travel_data.nunique()

Duration                         30
Cost of Travel(Entire Trip)    2818
Mode of Travel                    6
Stay                              4
First Name                      546
Last Name                       805
Date of Birth                  2879
Address                        3000
Age                              58
Sex                               4
Nationality                     243
Date of Travel                 1194
dtype: int64

In [8]:
#Review the Dataframe
greece_travel_data.shape

(3000, 12)

In [9]:
# Convert 'Date of Travel' column to datetime
greece_travel_data['Date of Travel'] = pd.to_datetime(greece_travel_data['Date of Travel'])

# Extract year and month from the date
greece_travel_data['Year'] = greece_travel_data['Date of Travel'].dt.year
greece_travel_data['Month'] = greece_travel_data['Date of Travel'].dt.month

# Display the cleaned dataset
with pd.option_context('display.colheader_justify', 'center'):
    print(greece_travel_data.to_string(index=False))

 Duration Cost of Travel(Entire Trip) Mode of Travel   Stay   First Name  Last Name  Date of Birth                              Address                               Age     Sex                         Nationality                     Date of Travel  Year  Month
     1             $20,111.00              Flight      Hotel    Cristina     Fuentes     6/14/14                       5143 Pope Camp Apt. 028\nLake Emily, UT 71371  74   Non-Binary                    Lao People's Democratic Republic   2022-09-13    2022    9  
     5             $29,681.00                 Car      Hotel    Patricia       Young     9/25/23         716 Dominguez Row Suite 775\nSouth Tiffanyborough, WV 27800  19       Female                                               Gabon   2021-09-04    2021    9  
     7             $24,950.00              Flight     Airbnb   Christina       Kirby      1/2/35                                    PSC 4600, Box 1237\nAPO AE 32510  36       Female                                S

  greece_travel_data['Date of Travel'] = pd.to_datetime(greece_travel_data['Date of Travel'])


In [10]:
# Explore Mode of Travel
grouped_travelmode = greece_travel_data.groupby('Mode of Travel')

In [67]:
# Convert categorical data to numeric with `pd.get_dummies`
dummies = pd.get_dummies(greece_travel_data)
dummies.head()

Unnamed: 0,Duration,Age,"Cost of Travel(Entire Trip)_$10,006.00","Cost of Travel(Entire Trip)_$10,018.00","Cost of Travel(Entire Trip)_$10,027.00","Cost of Travel(Entire Trip)_$10,038.00","Cost of Travel(Entire Trip)_$10,044.00","Cost of Travel(Entire Trip)_$10,047.00","Cost of Travel(Entire Trip)_$10,048.00","Cost of Travel(Entire Trip)_$10,062.00",...,Date of Travel_9/6/22,Date of Travel_9/7/20,Date of Travel_9/7/21,Date of Travel_9/7/22,Date of Travel_9/8/20,Date of Travel_9/8/21,Date of Travel_9/8/22,Date of Travel_9/9/20,Date of Travel_9/9/21,Date of Travel_9/9/22
0,1,74,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,5,19,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,7,36,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,19,38,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,28,38,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [60]:
# Split our preprocessed data into our features and target arrays
y = dummies['Duration'].values
X = dummies.drop(columns='Duration').values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [41]:
# Remove dollar signs and commas and convert 'Cost of Travel(Entire Trip)' to float
greece_travel_data['Cost of Travel(Entire Trip)'] = greece_travel_data['Cost of Travel(Entire Trip)'].str.replace('$', '').str.replace(',', '').astype(float)

In [42]:
# Assuming you have set 'Best_Time_to_Travel' based on your criteria

# Define features and target
features = ['Year', 'Month']  # Add more features if needed
target = 'Date of Travel'

# Prepare the features and target variables
X = greece_travel_data[features]
y = greece_travel_data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a machine learning classifier (for example, RandomForestClassifier)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model
accuracy = model.score(X_test, y_test)
print(f'Model Accuracy: {accuracy:.2f}')

Model Accuracy: 0.04
