In [1]:
from google.cloud import bigquery as bq
from google.cloud import bigquery_storage as bq_storage
import plotly.express as px
import pandas as pd

# create clients
client = bq.Client.from_service_account_json("key.json")
storage_client = bq_storage.BigQueryReadClient.from_service_account_json("key.json")

bq_dataset_ref = client.dataset("stackoverflow", project="bigquery-public-data")
bq_dataset = client.get_dataset(bq_dataset_ref)


In [2]:
#### GET react, angular, vue.js QUESTIONS ##############

sample_size = 100000

query = '''
    SELECT RAND() as r, tags, creation_date FROM `bigquery-public-data.stackoverflow.posts_questions`
    WHERE REGEXP_CONTAINS(tags, r"vue") OR REGEXP_CONTAINS(tags, r"react") OR REGEXP_CONTAINS(tags, r"angular")
     OR REGEXP_CONTAINS(tags, r"svelte")  OR REGEXP_CONTAINS(tags, r"ember.js")  OR REGEXP_CONTAINS(tags, r"jquery")
    ORDER BY r
    LIMIT ''' + str(sample_size)


df = (
    client.query(query)
    .result()
    .to_dataframe(bqstorage_client=storage_client)
)

df

Unnamed: 0,r,tags,creation_date
0,4.698826e-07,javascript|angular|youtube-iframe-api,2018-09-10 08:22:30.697000+00:00
1,7.680278e-07,jquery,2012-09-03 19:13:07.620000+00:00
2,7.896342e-07,javascript|jquery|html|css,2014-05-15 02:48:34.943000+00:00
3,1.121077e-06,javascript|angularjs,2016-04-20 11:28:03.967000+00:00
4,1.306132e-06,jquery|jquery-selectors|checkbox,2013-02-05 11:41:33.790000+00:00
...,...,...,...
99995,5.169622e-02,jquery|ios|safari|webkit|responsive-design,2012-01-23 11:15:02.890000+00:00
99996,5.169702e-02,javascript|php|jquery|html,2016-11-22 19:52:53.617000+00:00
99997,5.169705e-02,javascript|jquery,2014-04-07 19:46:00.343000+00:00
99998,5.169764e-02,angular|angular7,2019-12-03 10:10:16.097000+00:00


In [3]:
##### FRONTEND 1 CHANGE TAG, SAVE TO CSV ################

def setTag(x):
    if(x.find('react') != -1):
        return 'react'
    elif(x.find('angular') != -1):
        return 'angular'
    elif(x.find('vue.js') != -1):
        return 'vue.js'
    elif(x.find('ember.js') != -1):
        return 'ember.js'
    elif(x.find('svelte') != -1):
        return 'svelte'
    elif(x.find('jquery') != -1):
        return 'jquery'
    

toSaveDf = pd.DataFrame(df)
toSaveDf = toSaveDf.drop(columns=['r'])
toSaveDf.rename(columns={'tags':'framework'}, inplace=True)
toSaveDf['framework'] = toSaveDf['framework'].map(setTag)
toSaveDf.to_csv('js-framework-tags-dataframe.csv')

In [6]:
## FRONTEND FRAMEWORK 2 ##
sample_size = 100000

query = '''
    SELECT RAND() as r, tags, creation_date FROM `bigquery-public-data.stackoverflow.posts_questions`
    WHERE ( REGEXP_CONTAINS(tags, r"html") 
        OR REGEXP_CONTAINS(tags, r"javascript")
        OR REGEXP_CONTAINS(tags, r"HTML")
        OR REGEXP_CONTAINS(tags, r"css")
        OR REGEXP_CONTAINS(tags, r"CSS"))
    AND NOT REGEXP_CONTAINS(tags, r"vue")
    AND NOT REGEXP_CONTAINS(tags, r"angular")
    AND NOT REGEXP_CONTAINS(tags, r"react")
    AND NOT REGEXP_CONTAINS(tags, r"express")
    AND NOT REGEXP_CONTAINS(tags, r"ember")
    AND NOT REGEXP_CONTAINS(tags, r"node")
    ORDER BY r
    LIMIT ''' + str(sample_size)

query_new = '''
    SELECT RAND() as r, tags, creation_date FROM `bigquery-public-data.stackoverflow.posts_questions`
    WHERE REGEXP_CONTAINS(tags, r"vue") OR REGEXP_CONTAINS(tags, r"react") OR REGEXP_CONTAINS(tags, r"angular")
    OR REGEXP_CONTAINS(tags, r"svelte")  OR REGEXP_CONTAINS(tags, r"ember")
    ORDER BY r
    LIMIT ''' + str(sample_size)


df_frontend2_old = (
    client.query(query)
    .result()
    .to_dataframe(bqstorage_client=storage_client)
)

df_frontend2_new = (
    client.query(query_new)
    .result()
    .to_dataframe(bqstorage_client=storage_client)
)

print(df_frontend2_old)
print(df_frontend2_new)

                  r                           tags  \
0      1.604676e-08              javascript|jquery   
1      1.960438e-07                javascript|date   
2      6.975842e-07         javascript|jquery|ajax   
3      7.397319e-07                 javascript|php   
4      9.006150e-07                 javascript|php   
...             ...                            ...   
99995  4.068013e-02  javascript|php|html|http-post   
99996  4.068020e-02             jquery|css|plugins   
99997  4.068020e-02             javascript|firefox   
99998  4.068039e-02      javascript|pagination|yui   
99999  4.068056e-02   javascript|jquery|arrays|jsp   

                         creation_date  
0     2011-06-01 19:34:57.343000+00:00  
1     2009-01-30 14:22:41.713000+00:00  
2     2017-03-29 21:24:01.210000+00:00  
3     2014-02-19 03:20:25.577000+00:00  
4     2014-03-21 11:15:07.890000+00:00  
...                                ...  
99995 2016-09-27 08:53:59.903000+00:00  
99996 2020-05-15 08:49:

In [8]:
##### FRONTEND 2 CHANGE TAG, SAVE TO CSV ################

def setTag(x):
    if(
        x.find('react') != -1 or
        x.find('angular') != -1 or
        x.find('ember') != -1 or
        x.find('vue') != -1 or
        x.find('svelte') != -1
    ):
        return 'new'
    else:
        return 'old'
    
df_frontend2_new
df_frontend2_old
toSaveDf = df_frontend2_new.append(df_frontend2_old, ignore_index=True)
toSaveDf = toSaveDf.drop(columns=['r'])
toSaveDf.rename(columns={'tags':'technology'}, inplace=True)
toSaveDf['technology'] = toSaveDf['technology'].map(setTag)
toSaveDf.to_csv('old_vs_new_frontend.csv')


In [9]:
toSaveDf

Unnamed: 0,technology,creation_date
0,new,2019-08-27 08:49:19.483000+00:00
1,new,2017-08-17 12:52:39.640000+00:00
2,new,2019-03-06 05:26:10.793000+00:00
3,new,2017-07-31 20:04:06.640000+00:00
4,new,2018-11-23 13:52:34.430000+00:00
...,...,...
199995,old,2016-09-27 08:53:59.903000+00:00
199996,old,2020-05-15 08:49:33.553000+00:00
199997,old,2015-05-15 12:41:46.400000+00:00
199998,old,2017-04-25 06:03:56.850000+00:00
