In [4]:
### Import packages for authentication

from google.cloud import bigquery
from google.oauth2 import service_account

### Import packages for converting query results into dataframe

import pandas as pd 

## Import packages to create absolute file path &  make code independent of operating system

from pathlib import Path
import os.path

In [6]:
### Authentication 

base_path = Path("__file__").parent
full_path = (base_path / "../data/raw/GoogleBigQuery_key.json").resolve()

credentials = service_account.Credentials.from_service_account_file(os.path.join(full_path))

In [8]:
### Construct a BigQuery client object.

client = bigquery.Client(credentials=credentials, 
project = credentials.project_id)

In [9]:
### Getting overview of Stack Overflow tables

stackoverflow = client.dataset('stackoverflow', project= 'bigquery-public-data')
print([x.table_id for x in client.list_tables(stackoverflow)])

['badges', 'comments', 'post_history', 'post_links', 'posts_answers', 'posts_moderator_nomination', 'posts_orphaned_tag_wiki', 'posts_privilege_wiki', 'posts_questions', 'posts_tag_wiki', 'posts_tag_wiki_excerpt', 'posts_wiki_placeholder', 'stackoverflow_posts', 'tags', 'users', 'votes']


In [6]:
### Make an API request

query = """
SELECT
      pq.score, pq.id as question_id, pa.parent_id as question_id_check, pq.title as question_title, pq.body as question_text, pq.answer_count,
      pq.comment_count, pq.creation_date, pq.favorite_count, pq.tags, pq.view_count, pa.body as answer_text
FROM `bigquery-public-data.stackoverflow.posts_questions` pq
INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` pa ON pq.id = pa.parent_id
WHERE pa.creation_date > "2019-05-30 00:00:00.000 UTC"
"""

dataframe = (
    client.query(query)
    .result()
    .to_dataframe()
)

In [7]:
### Display query results

display(dataframe)

Unnamed: 0,score,question_id,question_id_check,question_title,question_text,answer_count,comment_count,creation_date,favorite_count,tags,view_count,answer_text
0,-2,59318968,59318968,how can i have an logout option in my flutter ...,<p>i have developed an admin app in that i hav...,2,1,2019-12-13 08:47:25.110000+00:00,,firebase|flutter|dart|firebase-authentication,189,<p>replace this line</p>\n\n<pre><code>Navigat...
1,117,14940660,14940660,What's Mongoose error Cast to ObjectId failed ...,<p>When sending a request to <code>/customers/...,21,0,2013-02-18 16:18:03.700000+00:00,31.0,mongodb|mongoose,166823,"<p>If anyone runs into this, \nwhat solved it ..."
2,1,43315796,43315796,Angular2- Radio button testing,<p>I'm trying to test an HTML radio button wit...,2,7,2017-04-10 05:49:49.340000+00:00,,angular|karma-jasmine|angular2-testing,2018,<p>id attributes do need to be unique. To mak...
3,0,57089771,57089771,Filter out records in specified day of week in...,<p>I have inner query which is returning a spe...,2,7,2019-07-18 08:02:50.267000+00:00,1.0,sql|database|postgresql|date|inner-query,124,"<pre><code>SELECT id, to_char(markupdate, 'Day..."
4,1,57429407,57429407,Serializing a map getting an additional empty ...,<p>I am trying to serialize an object of <code...,1,8,2019-08-09 11:45:44.507000+00:00,,c++|qt,53,<p>You need to test the stream <em>after</em> ...
...,...,...,...,...,...,...,...,...,...,...,...,...
2463027,1,60836246,60836246,Standard library ABI compatibility,<p>Suppose we have a shared library which acce...,2,0,2020-03-24 17:28:38.407000+00:00,,linux|gcc|g++,41,<p>ABIs in practice are not linked to the stan...
2463028,-1,59485298,59485298,Why view.height is 3 times bigger than actual ...,<p>I have an ImageView with height set to 120d...,5,0,2019-12-26 07:31:51.597000+00:00,,android|xml|kotlin|view,72,<p>Because in xml file you have given size in ...
2463029,-1,57494584,57494584,C# MVC Ajax call returns undefined,"<p>I am new to ajax, am trying to send usernam...",2,3,2019-08-14 12:21:15.717000+00:00,,javascript|c#|ajax|model-view-controller,131,"<p>Specific to your case, you can refer to thi..."
2463030,1,57648623,57648623,How to create an expandeble multible imagebutt...,<p>I want to have multiple images displayed in...,1,4,2019-08-25 18:20:28.320000+00:00,0.0,c#|xamarin.forms,52,<p>I solved the problem by placing the grid in...


In [11]:
### Save dataframe to a csv file

base_path = Path("__file__").parent
full_path = (base_path / "../data/raw/stackoverflow_raw.csv").resolve()

dataframe.to_csv(os.path.join(full_path))

In [None]:
# Sample from main dataset

stackoverflow = dataframe.sample(n = 30000, random_state = 1)

In [None]:
### Save sample of dataframe to a csv file

base_path = Path("__file__").parent
full_path = (base_path / "../../data/raw/stackoverflow_raw_sample.csv").resolve()
stackoverflow.to_csv(os.path.join(full_path))