In [18]:
# Import the required modules
import pandas as pd
from pathlib import Path
import hvplot.pandas
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LinearRegression

In [19]:
# Read the CSV file from the Resources folder and create a Pandas DataFrame
Greece_data_df = pd.read_csv("Resources/travel_tourism_dataset.csv")
# Review the DataFrame
Greece_data_df.tail()

Unnamed: 0,Trip #no.,Duration,Cost of Travel(Entire Trip),Mode of Travel,Stay,First Name,Last Name,Date of Birth,Address,Age,Sex,Nationality,Date of Travel
2995,142,24,17801,Roadtrip,Resorts,Kerry,Allen,2002-06-02,"93471 Joyce Courts Apt. 859\nMitchellside, OK ...",73,Non-Binary,Mali,2023-07-03
2996,265,16,23410,Train,Hotel,Kevin,Johnson,1916-07-28,"921 Katie Parkways Apt. 558\nWest Lawrence, CT...",50,Male,Mexico,2022-08-22
2997,232,22,5656,Roadtrip,Hotel,William,Mclaughlin,1917-07-25,"99384 Shirley Pine Apt. 439\nEast Angela, ND 1...",36,Non-Binary,American Samoa,2021-02-08
2998,112,15,27509,Car,Airbnb,Marcus,Ray,1962-07-07,"PSC 1488, Box 2535\nAPO AP 58905",48,Female,Jersey,2021-06-19
2999,283,13,19865,Cruise,Airbnb,Tom,Fletcher,1990-03-20,908 Christina Junctions Apt. 546\nPerkinsville...,22,Non-Binary,Mozambique,2021-08-07


In [20]:
# Review the info
Greece_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 13 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Trip #no.                    3000 non-null   int64 
 1   Duration                     3000 non-null   int64 
 2   Cost of Travel(Entire Trip)  3000 non-null   int64 
 3   Mode of Travel               3000 non-null   object
 4   Stay                         3000 non-null   object
 5   First Name                   3000 non-null   object
 6   Last Name                    3000 non-null   object
 7   Date of Birth                3000 non-null   object
 8   Address                      3000 non-null   object
 9   Age                          3000 non-null   int64 
 10  Sex                          3000 non-null   object
 11  Nationality                  3000 non-null   object
 12  Date of Travel               3000 non-null   object
dtypes: int64(4), object(9)
memory usa

In [21]:
# Verify the categories of the "education" column
Greece_data_df["Mode of Travel"].value_counts()

Car         551
Bus         516
Cruise      501
Train       498
Roadtrip    473
Flight      461
Name: Mode of Travel, dtype: int64

In [22]:
# Transform the Mode of Travel column using get_dummies
ModeofTravel_dummies = pd.get_dummies(Greece_data_df["Mode of Travel"])
# Display the transformed data
ModeofTravel_dummies.tail()

Unnamed: 0,Bus,Car,Cruise,Flight,Roadtrip,Train
2995,0,0,0,0,1,0
2996,0,0,0,0,0,1
2997,0,0,0,0,1,0
2998,0,1,0,0,0,0
2999,0,0,1,0,0,0


In [23]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames
Greece_data_df = pd.concat([Greece_data_df, ModeofTravel_dummies], axis=1)

# Drop the original Mode of Travel column
Greece_data_df = Greece_data_df.drop(columns=["Mode of Travel"])

# Display the DataFrame
Greece_data_df.head()

Unnamed: 0,Trip #no.,Duration,Cost of Travel(Entire Trip),Stay,First Name,Last Name,Date of Birth,Address,Age,Sex,Nationality,Date of Travel,Bus,Car,Cruise,Flight,Roadtrip,Train
0,55,1,20111,Hotel,Cristina,Fuentes,1914-06-14,"5143 Pope Camp Apt. 028\nLake Emily, UT 71371",74,Non-Binary,Lao People's Democratic Republic,2022-09-13,0,0,0,1,0,0
1,288,5,29681,Hotel,Patricia,Young,1923-09-25,716 Dominguez Row Suite 775\nSouth Tiffanyboro...,19,Female,Gabon,2021-09-04,0,1,0,0,0,0
2,291,7,24950,Airbnb,Christina,Kirby,1935-01-02,"PSC 4600, Box 1237\nAPO AE 32510",36,Female,Syrian Arab Republic,2021-01-25,0,0,0,1,0,0
3,131,19,18676,Airbnb,Michael,Hudson,1982-04-21,"7013 Bryant Club Suite 695\nSouth Tina, WI 26739",38,Non-Binary,Afghanistan,2022-01-28,0,0,0,0,1,0
4,281,28,18899,Airbnb,Francisco,Hensley,1913-06-29,"074 Scott Brook Suite 700\nLukeville, VA 46541",38,Non-Binary,Uruguay,2021-10-21,0,0,0,1,0,0


In [33]:
X = Greece_data_df[['Cost of Travel(Entire Trip)', 'Bus']]
y = Greece_data_df['Age']

In [34]:
from sklearn import linear_model

In [35]:
regr = linear_model.LinearRegression()
regr.fit(X, y)

In [37]:
#predict the age of a person who has spent 29000 on a bus trip:
PredictAgev19000AndBus = regr.predict([[29000, 1]])
print(PredictAgev19000AndBus)

[44.35193896]




In [28]:
X = Greece_data_df[['Cost of Travel(Entire Trip)', 'Flight']]
y = Greece_data_df['Age']


In [29]:
from sklearn import linear_model


In [30]:
regr = linear_model.LinearRegression()
regr.fit(X, y)


In [38]:
#predict the age of a person who has spent 29000 on a flight trip:
PredictAgev19000AndFlight = regr.predict([[29000, 1]])
print(PredictAgev19000AndFlight)

[44.35193896]




In [39]:
#export data
Greece_data_df.to_csv("Resources/Greece_data_ModeOfTravel_cleaned_df.csv", index=False)