In [1]:
import pandas as pd
import numpy as np
from google.cloud import bigquery


client = bigquery.Client()

In [6]:
dataset_id = 'raw_data'
dataset_ref = client.dataset(dataset_id)

In [44]:
sql_query = '''SELECT
    c.Industry,
    AVG(n.sentiment) AS sentiment,
    AVG(f.AuditRisk) as AuditRisk,
    AVG(f.Dividend_rate) as Dividend_rate,
    AVG(f.Dividend_Yield) as Dividend_Yield,
    AVG(f.Payout_rate) as Payout_rate,
    AVG(f.Beta) as Beta,
    AVG(f.Market_Cap) as Market_Cap,
    AVG(f.profit_margins) as profit_margins,
    AVG(f.short_ratio) AS short_ratio,
    AVG(f.quick_ratio) AS quick_ratio,
    AVG(f.current_ratio) AS current_ratio,
    AVG(f.debtToEquity) AS debtToEquity
FROM
    `raw_data.FinanceSituation` AS f
JOIN
    `raw_data.Company` AS c
ON
    c.id = f.company_id
JOIN `raw_data.Tickers` AS t
ON t.ticker = c.id
JOIN (SELECT * FROM `raw_data.News` WHERE publish_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 6 MONTH)) AS n
ON n.id = t.news_id
GROUP BY
    c.Industry;'''

In [58]:
destination_table_id = "is3107-news.raw_data.Industry"
#check existence of table
try:
    table = client.get_table(destination_table_id)
    table_exists = True
except:
    table_exists = False

In [59]:
#config to overwrite data in it
job_config = bigquery.QueryJobConfig(destination=destination_table_id, write_disposition="WRITE_TRUNCATE")

In [60]:
if table_exists:
    query_job = client.query(sql_query, job_config=job_config)
    query_job.result()  # Wait for the query to finish execution
else: 
    table = bigquery.Table(destination_table_id)
    table = client.create_table(table)
    query_job = client.query(sql_query, job_config=job_config)
    query_job.result()  # Wait for the query to finish execution