In [14]:
import pandas as pd
import numpy as np
import os
import requests
import pickle

import isodate
from datetime import datetime
import time

from dotenv import load_dotenv 
load_dotenv()

# youtube api
from googleapiclient.discovery import build
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors


# text recognition
import nltk
import spacy
import locationtagger
import re
import itertools

# different operators
from collections import Counter
from operator import itemgetter

# pytrends api
#from pytrends.request import TrendReq

# viz
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Loading dataframes

In [80]:
# Youtube data

channel_stats = pd.read_pickle(r'./data/channel_stats.pkl')
videos_info = pd.read_pickle(r'./data/video_info.pkl')
places_per_year_by_channel = pd.read_pickle(r'./data/places_per_year_by_channel.pkl')
places_per_year_filtered = pd.read_pickle(r'./data/places_year_filtered_notuple.pkl')


In [12]:
# Google trends data

pytrends_top_places_2018= pd.read_pickle('pytrends_top_places_2018.pkl')
pytrends_top_places_2019= pd.read_pickle('pytrends_top_places_2019.pkl')
pytrends_top_places_2020 = pd.read_pickle('pytrends_top_places_2020.pkl')
pytrends_top_places_2021= pd.read_pickle('pytrends_top_places_2021.pkl')

In [13]:
# Hotel occupancy rate and international arrivals data

hotel_data = pd.read_pickle('hotel_data_edited.pkl')
number_of_tourist_arrivals = pd.read_pickle('international-tourism-number-of-arrivals.pkl')


In [214]:
pytrends_top_places_2021.columns

Index(['date', 'Thailand', 'California', 'New zealand'], dtype='object')

# Visualizations

## Videos mentioning a place X Google trends interest over time for the same place

In [307]:
def plotting_trends_videos(trends, videos_info, place, year):

    select = videos_info.loc[(videos_info['everywhere_string'].str.contains(f"{place}")) & 
                             (videos_info['year_published'] == year)]

    trace1 = go.Line(
        x = trends['date'],
        y = trends[place],
        name= f'Interest over time for {place}',
        marker=dict(
            color='rgb(34,163,192)'
                   )
    )
    trace2 = go.Scatter(
        x=select['publishedAt'],
        y=select['viewCount'],
        name='Videos posted by number of views',
        yaxis='y2',
        mode = 'markers',
        marker = dict(size=8,
                      symbol = 'star-diamond',
                      color = 'rgba(190, 167, 9, 0.8)')

    )

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(trace1)
    fig.add_trace(trace2, secondary_y=True)
    
    fig['layout'].update(height = 600, width = 1000, 
                         title = f"{year}: Google Trends for '{place}' x Travel videos on YouTube that mention '{place}'",
                         xaxis=dict(tickangle=-90),
                        xaxis_title="Date",
                        yaxis_title="Google trends - interest over time",
                        yaxis2_title="Number of views per video in millions",
                        plot_bgcolor='rgba(240, 242, 247, 0.8)')
                        #,legend_title="Legend")
    
    fig.update_xaxes(minor=dict(ticklen=6, tickcolor="black", showgrid=True))
    
    return fig

In [308]:
thai_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Thailand', 2018)
thai_2018_trend.write_image(r".\images\graphs\thai_2018_trend.png")
thai_2018_trend.show()

In [227]:
bangkok_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Bangkok', 2018)
bangkok_2018_trend.write_image(r".\images\graphs\bangkok_2018_trend.png")
bangkok_2018_trend.show()

In [228]:
philippines_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Philippines', 2018)
philippines_2018_trend.write_image(r".\images\graphs\philippines_2018_trend.png")
philippines_2018_trend.show()

In [229]:
japan_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'Japan', 2019)
japan_2019_trend.write_image(r".\images\graphs\japan_2019_trend.png")
japan_2019_trend.show()

In [230]:
thai_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'Thailand', 2019)
thai_2019_trend.write_image(r".\images\graphs\thai_2019_trend.png")
thai_2019_trend.show()

In [231]:
india_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'India', 2019)
india_2019_trend.write_image(r".\images\graphs\india_2019_trend.png")
india_2019_trend.show()

In [232]:
thai_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'Thailand', 2020)
thai_2020_trend.write_image(r".\images\graphs\thai_2020_trend.png")
thai_2020_trend.show()

In [233]:
india_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'India', 2020)
india_2020_trend.write_image(r".\images\graphs\india_2020_trend.png")
india_2020_trend.show()

In [234]:
# bias: some channels talked a lot about canada because the owners were living there

canada_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'Canada', 2020)
canada_2020_trend.write_image(r".\images\graphs\canada_2020_trend.png")
canada_2020_trend.show()

In [235]:
thai_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'Thailand', 2021)
thai_2021_trend.write_image(r".\images\graphs\thai_2021_trend.png")
thai_2021_trend.show()

In [236]:
newzealand_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'New zealand', 2021)
newzealand_2021_trend.write_image(r".\images\graphs\newzealand_2021_trend.png")
newzealand_2021_trend.show()

In [237]:
california_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'California', 2021)
california_2021_trend.write_image(r".\images\graphs\california_2021_trend.png")
california_2021_trend.show()

## Number of views per place over time X Number of tourist arrivals over time

In [340]:
def plotting_tourism_data (videos_info, tourism_info):

    place = input('Type the place \n')
    
    views = videos_info.loc[(videos_info['everywhere_string'].str.contains(place)) 
                            & ((videos_info['year_published']>=2018) & (videos_info['year_published']<2022))
                           ].groupby('year_published')[['viewCount']].sum().reset_index()
    
    arrivals = number_of_tourist_arrivals.loc[(number_of_tourist_arrivals['Entity'] == place) 
                                             & (number_of_tourist_arrivals['Year']>=2018)]
    
    trace1 = go.Line(
        x = views['year_published'],
        y = views['viewCount'],
        name= f'Total number of YouTube views for {place}',
        yaxis='y2',
        line = dict(width = 2),
        marker=dict(
            color='rgb(34,163,192)',
            line = dict(width = 2, color='gray')
            )
    )

    trace2 = go.Bar(
        x = arrivals['Year'],
        y = arrivals['International tourism, number of arrivals'],
        name='Number of international tourists arriving',
        marker = dict(color = 'rgba(221, 206, 103, 0.8)')
    )


    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(trace1, secondary_y=True)
    fig.add_trace(trace2)


    fig['layout'].update(height = 600, width = 1000, 
                         title = f"2018-2021: Number of tourist arrivals in '{place}' x Travel videos on YouTube that mention '{place}'",
                         xaxis=dict(tickangle=-90),
                         xaxis_title="Year",
                         yaxis2_title="Total views in millions",
                         yaxis_title="Number of arrivals in millions",
                         plot_bgcolor='rgba(240, 242, 247, 0.8)')
                        #,legend_title="Legend")

    fig.update_yaxes(minor=dict(ticklen=6, tickcolor="black", showgrid=True, nticks=3))

    return fig

In [341]:
thai_tourism = plotting_tourism_data(videos_info, number_of_tourist_arrivals)
thai_tourism.write_image(r".\images\graphs\thai_tourism.png")
thai_tourism.show()

Type the place 
Thailand



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [342]:
japan_tourism = plotting_tourism_data(videos_info, number_of_tourist_arrivals)
japan_tourism.write_image(r".\images\graphs\japan_tourism.png")
japan_tourism.show()

Type the place 
Japan



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


