In [2]:
import pandas as pd
import numpy as np
import os
import pickle

# import isodate
# from datetime import datetime
# import time

from dotenv import load_dotenv 
load_dotenv()


# text recognition
# import nltk
# import spacy
# import locationtagger
# import re
# import itertools

# different operators
from collections import Counter
from operator import itemgetter


# viz
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# streamlit
import streamlit as st

# Loading dataframes

In [7]:
# Youtube data

channel_stats = pd.read_pickle(r'./data/channel_stats.pkl')
videos_info = pd.read_pickle(r'./data/video_info.pkl')
places_per_year_by_channel = pd.read_pickle(r'./data/places_per_year_by_channel.pkl')
places_per_year_filtered = pd.read_pickle(r'./data/places_year_filtered_notuple.pkl')
places_mentions_views = pd.read_pickle(r'./data/all_places_views_per_year')

In [8]:
places_mentions_views

Unnamed: 0,top_places_,mentions,total_number_of_views,year
0,Thailand,92,125362547.0,2018
1,Pakistan,17,103972401.0,2018
2,Bangkok,64,90442073.0,2018
3,Mexico,40,82375702.0,2018
4,Ghana,10,48081699.0,2018
...,...,...,...,...
4166,Pisa,1,1265.0,2010
4167,Quel,1,1265.0,2010
4168,Charles,1,840.0,2010
4169,Grammer,1,576.0,2010


In [None]:
# Google trends data

pytrends_top_places_2018= pd.read_pickle(r'./data/pytrends_top_places_2018.pkl')
pytrends_top_places_2019= pd.read_pickle(r'./data/pytrends_top_places_2019.pkl')
pytrends_top_places_2020 = pd.read_pickle(r'./data/pytrends_top_places_2020.pkl')
pytrends_top_places_2021= pd.read_pickle(r'./data/pytrends_top_places_2021.pkl')

In [None]:
# Hotel occupancy rate and international arrivals data

hotel_data = pd.read_pickle(r'./data/hotel_data_edited.pkl')
number_of_tourist_arrivals = pd.read_pickle(r'./data/international-tourism-number-of-arrivals.pkl')


# Visualizations

## Most mentioned places X Most views per place

In [None]:
def general_data(year, places):
    
    top_views = places[places['year'] == year][:5]    
    
    trace3 = go.Scatter(
                    mode="lines",
                    x = top_views['top_places_'],
                    y = top_views['mentions'],
                    name='Number of mentions',
                    marker = dict(color = 'black')
                    
    )
    
    trace4 = go.Bar(
                    x = top_views['top_places_'],
                    y = top_views['total_number_of_views'],
                    name='Number of views',
                    marker = dict(color = '#04cad8')
                    
    )

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    fig.add_trace(trace3, secondary_y=True,)
    fig.add_trace(trace4)
    
    fig['layout'].update(height = 600, width = 1000, 
                         title = f"{year}'s top 5 places in number of views <br><sup>Ranking order is not necessarily the same for mentions</sup>",
                         xaxis=dict(tickangle=0),
                         xaxis_title="Top 5 places",
                         yaxis_title="Number of mentions",
                         yaxis2_title="Total views in millions",
                         plot_bgcolor='rgba(240, 242, 247, 0.8)')


    return fig

In [None]:
top_5_places_views_mentions_2018 = general_data(2018,places_mentions_views)
top_5_places_views_mentions_2018.write_image(r".\images\graphs\top_5_places_views_mentions_2018.png")
top_5_places_views_mentions_2018.show()

In [None]:
top_5_places_views_mentions_2019 = general_data(2019,places_mentions_views)
top_5_places_views_mentions_2019.write_image(r".\images\graphs\top_5_places_views_mentions_2019.png")
top_5_places_views_mentions_2019.show()

In [None]:
top_5_places_views_mentions_2020 = general_data(2020, places_mentions_views)
top_5_places_views_mentions_2020.write_image(r".\images\graphs\top_5_places_views_mentions_2020.png")
top_5_places_views_mentions_2020.show()

In [None]:
top_5_places_views_mentions_2021 = general_data(2021, places_mentions_views)
top_5_places_views_mentions_2021.write_image(r".\images\graphs\top_5_places_views_mentions_2021.png")
top_5_places_views_mentions_2021.show()

In [4]:
places_mentions_views.sample()

NameError: name 'places_mentions_views' is not defined

## Videos mentioning a place X Google trends interest over time for the same place

In [None]:
def plotting_trends_videos(trends, videos_info, place, year):

    select = videos_info.loc[(videos_info['everywhere_string'].str.contains(f"{place}")) & 
                             (videos_info['year_published'] == year)]

    trace1 = go.Line(
        x = trends['date'],
        y = trends[place],
        name= f'Interest over time for {place}',
        marker=dict(
            color= 'black'#'rgb(34,163,192)'
                   )
    )
    trace2 = go.Scatter(
        x=select['publishedAt'],
        y=select['viewCount'],
        name='Videos posted by number of views',
        yaxis='y2',
        mode = 'markers',
        marker = dict(size=8,
                      symbol = 'diamond-dot',
                      color = '#04cad8')#'rgba(190, 167, 9, 0.8)')

    )

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(trace1)
    fig.add_trace(trace2, secondary_y=True)
    
    fig['layout'].update(height = 600, width = 1000, 
                         title = f"Google Trends results for '{place}' x Views on YouTube for videos that mention '{place}'",
                         xaxis=dict(tickangle=-45),
                        xaxis_title="Date",
                        yaxis_title="Google trends - interest over time",
                        yaxis2_title="Number of views per video in millions",
                        plot_bgcolor='rgba(240, 242, 247, 0.8)')
                        #,legend_title="Legend")
    
    fig.update_xaxes(minor=dict(ticklen=6, tickcolor="black", showgrid=True))
    
    return fig

In [None]:
thai_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Thailand', 2018)
thai_2018_trend.write_image(r".\images\graphs\thai_2018_trend.png")
thai_2018_trend.show()

In [None]:
pakistan_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Pakistan', 2018)
pakistan_2018_trend.write_image(r".\images\graphs\pakistan_2018_trend.png")
pakistan_2018_trend.show()

In [None]:
mexico_2018_trend = plotting_trends_videos(pytrends_top_places_2018, videos_info, 'Mexico', 2018)
mexico_2018_trend.write_image(r".\images\graphs\mexico_2018_trend.png")
mexico_2018_trend.show()

In [None]:
india_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'India', 2019)
india_2019_trend.write_image(r".\images\graphs\india_2019_trend.png")
india_2019_trend.show()

In [None]:
thai_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'Thailand', 2019)
thai_2019_trend.write_image(r".\images\graphs\thai_2019_trend.png")
thai_2019_trend.show()

In [None]:
pakistan_2019_trend = plotting_trends_videos(pytrends_top_places_2019, videos_info, 'Pakistan', 2019)
pakistan_2019_trend.write_image(r".\images\graphs\pakistan_2019_trend.png")
pakistan_2019_trend.show()

In [None]:
thai_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'Thailand', 2020)
thai_2020_trend.write_image(r".\images\graphs\thai_2020_trend.png")
thai_2020_trend.show()

In [None]:
tabriz_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'Tabriz', 2020)
tabriz_2020_trend.write_image(r".\images\graphs\tabriz_2020_trend.png")
tabriz_2020_trend.show()

In [None]:

india_2020_trend = plotting_trends_videos(pytrends_top_places_2020, videos_info, 'India', 2020)
india_2020_trend.write_image(r".\images\graphs\india_2020_trend.png")
india_2020_trend.show()

In [None]:
thai_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'Thailand', 2021)
thai_2021_trend.write_image(r".\images\graphs\thai_2021_trend.png")
thai_2021_trend.show()

In [None]:
california_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'California', 2021)
california_2021_trend.write_image(r".\images\graphs\california_2021_trend.png")
california_2021_trend.show()

In [None]:
arizona_2021_trend = plotting_trends_videos(pytrends_top_places_2021, videos_info, 'Arizona', 2021)
arizona_2021_trend.write_image(r".\images\graphs\arizona_2021_trend.png")
arizona_2021_trend.show()

## Number of views per place over time X Number of tourist arrivals over time

In [None]:
def plotting_tourism_data (places_mentions_views, tourism_info):#, option, hotels, type_option, col):

    place = input('Type the place \n')
    #place2 = input
    
    views = places_mentions_views[places_mentions_views['top_places_']==place]
    
    arrivals = number_of_tourist_arrivals.loc[(number_of_tourist_arrivals['Entity'] == place) 
                                             & (number_of_tourist_arrivals['Year']>=2018)]
    
        
    fig = make_subplots(#rows=1, cols=2, 
                    specs=[[{"secondary_y": True}]])    
    
    trace1 = go.Line(
                    #mode = 'lines',
                    x = views['year'],
                    y = views['total_number_of_views'],
                    name= f'Total number of YouTube views for {place}',
                    yaxis='y2',
                    line = dict(width = 2),
                    marker=dict(
                                color='black',#rgb(34,163,192)
                                line = dict(width = 2, color='black')
                                )
    )

    trace2 = go.Bar(
                    x = arrivals['Year'],
                    y = arrivals['International tourism, number of arrivals'],
                    name='Number of tourists arriving',
                    marker = dict(color = '#04cad8')#rgba(221, 206, 103, 0.8)
    )

    
    fig.add_trace(trace1, secondary_y=True)
    fig.add_trace(trace2)
    

    fig['layout'].update(height = 600, width = 1000, 
                         title = f"Tourist arrivals in '{place}' x Views on YouTube for videos mentioning '{place}'",
                         xaxis=dict(tickangle=0, type = 'category', categoryorder='array', 
                                    categoryarray = ['2018', '2019', '2020', '2021']),
                         xaxis_title="Year",
                         yaxis2_title="Total views in millions",
                         yaxis_title="Number of arrivals in millions",
                         plot_bgcolor='rgba(240, 242, 247, 0.8)')
                        #,legend_title="Legend")

    fig.update_yaxes(minor=dict(ticklen=6, tickcolor="black", showgrid=True, nticks=3))
    
#     if option == 2:
        
#         h = hotel_data[(hotel_data['country']==place)&(hotel_data['year']>=2018)]
    
#         trace3 = go.Line(
#                             #mode = 'lines',
#                             x = h['year'],
#                             y = h[col],
#                             name= f'Occupancy rate of hotel {type_option}',
#                             yaxis='y3',
#                             line = dict(width = 2),
#                             marker=dict(
#                                 color='black',#rgb(34,163,192)
#                                 line = dict(width = 2, color='black')
#                                 )
#                             )
        
#         fig['layout'].update(height = 600, width = 1000, 
#                          title = f"International tourists arrivals in '{place}' x Views on YouTube for videos mentioning '{place}'",
#                          xaxis=dict(tickangle=0, type = 'category', categoryorder='array', 
#                                     categoryarray = ['2018', '2019', '2020', '2021']),
#                          xaxis_title="Year",
                         
#                          yaxis3=dict(
#                                     title="yaxis3 title",
#                                     titlefont=dict(
#                                     color="#d62728"
#                                     ),
#                                     anchor="x",                      #If set to "x", this axis is bound to the corresponding opposite-letter axis - in this case, y-axis. And the 'side' parameter specifies which side this y-axis is placed
#                                     overlaying="y",
#                                     side="left"
#                           ),
                         
#                           yaxis2 = dict(title="Total views in millions",
#                                      anchor="free",                   #If set to "free", this axis' position is determined by 'position'.
#                                      overlaying="y",
#                                      side="left",
#                                      position=0.5),
                             
#                           yaxis_title="Number of arrivals in millions",
#                           plot_bgcolor='rgba(240, 242, 247, 0.8)')
    
    
#         fig.add_trace(trace3)

#         return fig

    return fig

In [None]:
thai_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)#, 2, hotel_data, 'rooms', 'occupancy_rate_rooms')
#thai_tourism.write_image(r".\images\graphs\thai_tourism.png")
thai_tourism.show()

In [3]:
pakistan_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
pakistan_tourism.write_image(r".\images\graphs\pakistan_tourism.png")
pakistan_tourism.show()

NameError: name 'plotting_tourism_data' is not defined

In [None]:
mexico_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
mexico_tourism.write_image(r".\images\graphs\mexico_tourism.png")
mexico_tourism.show()

In [None]:
california_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
california_tourism.write_image(r".\images\graphs\california_tourism.png")
california_tourism.show()

In [None]:
arizona_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
arizona_tourism.write_image(r".\images\graphs\arizona_tourism.png")
arizona_tourism.show()

In [None]:
tabriz_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
tabriz_tourism.write_image(r".\images\graphs\tabriz_tourism.png")
tabriz_tourism.show()

In [None]:
jamaica_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
jamaica_tourism.write_image(r".\images\graphs\jamaica_tourism.png")
jamaica_tourism.show()

In [None]:
india_tourism = plotting_tourism_data(places_mentions_views, number_of_tourist_arrivals)
india_tourism.write_image(r".\images\graphs\india_tourism.png")
india_tourism.show()

# Streamlit