# Digital Local Knowledge: Searching what Polish people say about air quality in their country

In [1]:
# Import Relevant Dependencies

    # Pytrends
from pytrends.request import TrendReq

import pandas as pd

    # Plotly
import plotly
import plotly.graph_objs as go

from plotly.offline import init_notebook_mode

from config import username, api_key

## What does Google Trends indicate about Poles' interests in Air Quality in their country?

In [13]:
# Pytrends is an "unofficial" API to research Google Trends' data
# Ref: https://github.com/GeneralMills/pytrends

pytrends = TrendReq(hl='pl')

In [15]:
kw_list = ['jakość powietrza', 'smog', 'zanieczyszczenie powietrza', 'czujnik powietrza']
        #Search Keywords: air quality, smog, air pollution
pytrends.build_payload(kw_list, 
                       cat=0, 
                       timeframe='today 5-y', 
                       geo='PL', # 'PL' for 'Poland'; 'PL-MA' for 'Poland: Lesser Poland' is referenced in the G-trends URL https://trends.google.com/trends/explore?date=today%205-y&geo=PL-MA&q=jako%C5%9B%C4%87%20powietrza
                       gprop='')
# --------------------------------------------------------------------------------------------------------
# G-Trends for Poland
timelinePoland_df = pytrends.interest_over_time()
timelinePoland_df = timelinePoland_df.reset_index()
timelinePoland_df['date'] =  pd.to_datetime(timelinePoland_df['date'], format='%d%b%Y:%H:%M:%S.%f')

In [16]:
kw_list = ['jakość powietrza', 'smog', 'zanieczyszczenie powietrza', 'czujnik powietrza']
        #Search Keywords: air quality, smog, air pollution
pytrends.build_payload(kw_list, 
                       cat=0, 
                       timeframe='today 5-y', 
                       geo='PL-MA', # 'PL' for 'Poland'; 'PL-MA' for 'Poland: Lesser Poland' is referenced in the G-trends URL https://trends.google.com/trends/explore?date=today%205-y&geo=PL-MA&q=jako%C5%9B%C4%87%20powietrza
                       gprop='')
# --------------------------------------------------------------------------------------------------------
# G-Trends for Lesser Poland
timelinePLMA_df = pytrends.interest_over_time()
timelinePLMA_df = timelinePLMA_df.reset_index()
timelinePLMA_df['date'] =  pd.to_datetime(timelinePLMA_df['date'], format='%d%b%Y:%H:%M:%S.%f')

In [22]:
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# --------------------------------------------------------------------------------------------------------
# params = {
#     "username": username,
#     "api_key": api_key,
# }
# plotly.tools.set_credentials_file(params)
# --------------------------------------------------------------------------------------------------------
# Create and style traces
poland0 = go.Scatter(
    x = timelinePoland_df['date'],
    y = timelinePoland_df['smog'],
    name = 'G-Trends, Poland: Smog',
    line = dict(
        color = ('rgb(205, 12, 24)'),
        width = 2)
)
poland1 = go.Scatter(
    x = timelinePoland_df['date'],
    y = timelinePoland_df['jakość powietrza'],
    name = 'G-Trends, Poland: Air Quality',
    line = dict(
        color = ('rgb(22, 96, 167)'),
        width = 2,)
)
poland2 = go.Scatter(
    x = timelinePoland_df['date'],
    y = timelinePoland_df['zanieczyszczenie powietrza'],
    name = 'G-Trends, Poland: Air Pollution',
    line = dict(
        color = ('rgb(0,100,80)'),
        width = 2,)
)
poland3 = go.Scatter(
    x = timelinePoland_df['date'],
    y = timelinePoland_df['zanieczyszczenie powietrza'],
    name = 'G-Trends, Poland: Air Sensor',
    line = dict(
        color = ('rgb(255,99,71)'),
        width = 2,)
)

lesserPoland0 = go.Scatter(
    x = timelinePLMA_df['date'],
    y = timelinePLMA_df['smog'],
    name = 'G-Trends, Lesser Poland: Smog',
    line = dict(
        color = ('rgb(205, 12, 24)'),
        width = 2,
    dash = 'dash')
)
lesserPoland1 = go.Scatter(
    x = timelinePLMA_df['date'],
    y = timelinePLMA_df['jakość powietrza'],
    name = 'G-Trends, Lesser Poland: Air Quality',
    line = dict(
        color = ('rgb(22, 96, 167)'),
        width = 2,
    dash = 'dash')
)
lesserPoland2 = go.Scatter(
    x = timelinePLMA_df['date'],
    y = timelinePLMA_df['zanieczyszczenie powietrza'],
    name = 'G-Trends, Lesser Poland: Air Pollution',
    line = dict(
        color = ('rgb(0,100,80)'),
        width = 2,
    dash = 'dash')
)
lesserPoland3 = go.Scatter(
    x = timelinePLMA_df['date'],
    y = timelinePLMA_df['zanieczyszczenie powietrza'],
    name = 'G-Trends, Lesser Poland: Air Sensor',
    line = dict(
        color = ('rgb(255,99,71)'),
        width = 2,
    dash = 'dash')
)


data = [poland0,poland1,poland2,poland3,lesserPoland0,lesserPoland1,lesserPoland2,lesserPoland3]
    
layout = dict(title = 'Air Quality in Google Trends, Poland, 2014-2019',
              xaxis = dict(title = 'Months'),
              yaxis = dict(title = 'G-Trend Interest'),
              )

fig = dict(data=data, layout=layout)
iplot(fig, filename='poland_GTrends')
                      
# plotly.offline.plot(data, filename='poland_GTrends_TimeS') # Saving as an html file
    # Ref: https://plot.ly/python/offline/

In [None]:
# Further Trends Queries
pytrends.related_queries()
pytrends.interest_by_region()

## What is Poland's preferred social media platform?
    > Are people discussing air quality topics there?
    > API availability, or do I need to build a web crawler?

### Background Info:

 > The demographics show that social media is used by: 82% of respondents aged 18 to 24, 47% of respondents aged 35 to 64 and 30% among respondents aged over 65. Among respondents aged 18 to 24, social media usage is further broken down as follows: Facebook (74%), nk.pl (40%) and Twitter (2%), whereas those in the age group 25 to 34 use Facebook (58%), nk.pl (45%) and Twitter (4%). In fact, one in five Poles aged over 65 has a Facebook account.
https://businessculture.org/eastern-europe/poland/social-media-guide/


## FB Graph API Documentation:
### https://developers.facebook.com/docs/graph-api/reference

### Potential issues: https://www.drupal.org/project/socialfeed/issues/2977624

### Ref: https://minimaxir.com/2015/07/facebook-scraper/

Params of Interest:
    > Comments
    > Conversations
    > Shares
    > Likes
    > Group
    > Photo
    > Place
    > Place Information
    > Place Context
    > URL
    > User
    > User Context
    
    Yikes! 
    https://developers.facebook.com/docs/public_feed/
        > Cannot scrape for public feed/posts
            > i.e. I can't use the FB's API to look for XYZ keywords and look for the last 100 comments with those keywords
            
Alternative: NK?
    > http://developers.nk.pl/documentation/website-developers/api-integration/

In [None]:
from socialreaper import Facebook
# Ref: https://reaper.social/platforms/facebook.html
# https://github.com/ScriptSmith/socialreaper

from config2 import token

fbk = Facebook(token)

comments = fbk.page_posts_comments("air quality", post_count=10, 
    comment_count=10)

for comment in comments:
    print(comment['message'])

In [None]:
Words to scrape:
    
'jakość powietrza', 'smog', 'zanieczyszczenie powietrza', 'czujnik powietrza',
'odświeżacz powietrza','astma','zapalenie płuc','dzieci','węgiel','elektrownia węglowa',
'Astma dziecięca'

# 'air quality', 'smog', 'air pollution', 'air sensor',
# 'air freshener', 'asthma', 'pneumonia', 'children', 'coal', 'coal power plant','Childhood asthma'