In [1]:
import pandas as pd
import pymongo


arrivals_df  = pd.read_csv("tourist_arrival.csv")
receipts_df =pd.read_csv("tourist_receipts.csv")
gdp_df = pd.read_csv("GDP.csv")

In [2]:
# Melting dataframes
receipts_long = receipts_df.melt(id_vars=['Series Name', 'Series Code', 'Country Name', 'Country Code'],
                                 var_name='Year',
                                 value_name='Receipts')

arrivals_long = arrivals_df.melt(id_vars=['Series Name', 'Series Code', 'Country Name', 'Country Code'],
                                 var_name='Year',
                                 value_name='Arrivals')

gdp_long = gdp_df.melt(id_vars=['Series Name', 'Series Code', 'Country Name', 'Country Code'],
                   var_name='Year', value_name='GDP')


In [3]:
receipts_long = receipts_long.drop_duplicates(subset=['Country Name', 'Country Code', 'Year'])
arrivals_long = arrivals_long.drop_duplicates(subset=['Country Name', 'Country Code', 'Year'])
gdp_long = gdp_long.drop_duplicates(subset=['Country Name', 'Country Code', 'Year'])

In [4]:
# Merging dataframes
tourism_data = pd.merge(receipts_long, arrivals_long, on=['Country Name', 'Country Code', 'Year'])
tourism_data = pd.merge(tourism_data, gdp_long, on=['Country Name', 'Country Code', 'Year'])


In [5]:
tourism_data.dtypes

Series Name_x    object
Series Code_x    object
Country Name     object
Country Code     object
Year             object
Receipts         object
Series Name_y    object
Series Code_y    object
Arrivals         object
Series Name      object
Series Code      object
GDP              object
dtype: object

In [6]:
tourism_data.to_csv('unclead_data.csv', index=False)

In [7]:
tourism_data.head(10)

Unnamed: 0,Series Name_x,Series Code_x,Country Name,Country Code,Year,Receipts,Series Name_y,Series Code_y,Arrivals,Series Name,Series Code,GDP
0,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Afghanistan,AFG,1995 [YR1995],..,"International tourism, number of arrivals",ST.INT.ARVL,..,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,..
1,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Albania,ALB,1995 [YR1995],70000000,"International tourism, number of arrivals",ST.INT.ARVL,304000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,13.322333321684
2,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Algeria,DZA,1995 [YR1995],..,"International tourism, number of arrivals",ST.INT.ARVL,520000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,3.79999478984085
3,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,American Samoa,ASM,1995 [YR1995],..,"International tourism, number of arrivals",ST.INT.ARVL,34000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,..
4,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Andorra,AND,1995 [YR1995],..,"International tourism, number of arrivals",ST.INT.ARVL,..,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,2.75750161326249
5,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Angola,AGO,1995 [YR1995],27000000,"International tourism, number of arrivals",ST.INT.ARVL,9000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,15.0000000288634
6,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Antigua and Barbuda,ATG,1995 [YR1995],..,"International tourism, number of arrivals",ST.INT.ARVL,447000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,-4.35958738239265
7,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Argentina,ARG,1995 [YR1995],2550000000,"International tourism, number of arrivals",ST.INT.ARVL,2289000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,-2.84520961057079
8,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Armenia,ARM,1995 [YR1995],14000000,"International tourism, number of arrivals",ST.INT.ARVL,12000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,6.89999841973659
9,"International tourism, receipts (current US$)",ST.INT.RCPT.CD,Aruba,ABW,1995 [YR1995],554000000,"International tourism, number of arrivals",ST.INT.ARVL,912000,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,2.54714368704694


In [8]:
tourism_data = tourism_data.drop(columns=['Series Name_x', 'Series Code_x', 'Series Name_y', 'Series Code_y'])
tourism_data = tourism_data.drop(columns=['Series Name', 'Series Code'])


In [9]:
tourism_data.dtypes

Country Name    object
Country Code    object
Year            object
Receipts        object
Arrivals        object
GDP             object
dtype: object

In [10]:
tourism_data.to_csv('merged_tourism_gdp_data.csv', index=False)

In [11]:
from dotenv import dotenv_values
# Replace this connection string with your own
config = dotenv_values("creds.env")

user = config['user']
password = config['password']

# Connect to MongoDB
uri = "mongodb+srv://" + user + ":" + password + "@cluster0.6jfc5iw.mongodb.net/"
client = pymongo.MongoClient(uri)


In [12]:
# Access a specific database (replace 'myDatabase' with your desired database name)
db = client['gfw']

# Access a specific collection within the database (replace 'myCollection' with your desired collection name)
collection = db['tourism']

In [13]:
data_to_insert = tourism_data.to_dict('records')

In [14]:
# Perform operations on the collection, e.g., insert a document
result = collection.insert_many(data_to_insert)

# Close the connection
client.close()