In [1]:
from google.cloud import bigquery

client = bigquery.Client()

dataset_id = 'raw_data'
# 获取数据集的引用
dataset_ref = client.dataset(dataset_id)

In [3]:
# table company
# 定义表的模式
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED", description="The Unique id of the company."),
    bigquery.SchemaField("name", "STRING", mode="REQUIRED", description="The short name of the company."),
    bigquery.SchemaField("fullTimeEmployees", "INTEGER", mode="NULLABLE", description="The employees of the company."),
    bigquery.SchemaField("Industry", "STRING", mode="NULLABLE", description="The industry this company belongs to."),
    bigquery.SchemaField("Country", "STRING", mode="NULLABLE", description="The country this company belongs to."),
]

# 创建表的配置
table_ref = dataset_ref.table("COMPANY")
table = bigquery.Table(table_ref, schema=schema)

# API 请求 - 创建表
created_table = client.create_table(table)
print("Table {} created.".format(created_table.table_id))

Table COMPANY created.


In [6]:
schema = [
    bigquery.SchemaField("company_id", "STRING", mode="REQUIRED", description="The Unique id of the situation’s company."),
    bigquery.SchemaField("AuditRisk", "INTEGER", mode="NULLABLE", description="The audit risk of the company."),
    bigquery.SchemaField("Dividend_rate", "FLOAT", mode="NULLABLE", description="The dividend rate of the company."),
    bigquery.SchemaField("Dividend_Yield", "FLOAT", mode="NULLABLE", description="The dividend yield of the company."),
    bigquery.SchemaField("Payout_rate", "FLOAT", mode="NULLABLE", description="The payout rate of the company."),
    bigquery.SchemaField("Beta", "FLOAT", mode="NULLABLE", description="The beta of the company."),
    bigquery.SchemaField("Market_Cap", "FLOAT", mode="NULLABLE", description="The market cap proportion of the company."),
    bigquery.SchemaField("profit_margins", "FLOAT", mode="NULLABLE", description="The profit margins of the company."),
    bigquery.SchemaField("short_ratio", "FLOAT", mode="NULLABLE", description="The short ratio of the company."),
    bigquery.SchemaField("quick_ratio", "FLOAT", mode="NULLABLE", description="The quick ratio of the company."),
    bigquery.SchemaField("current_ratio", "FLOAT", mode="NULLABLE", description="The current ratio proportion of the company."),
    bigquery.SchemaField("debtToEquity", "FLOAT", mode="NULLABLE", description="The debt to equity ratio of the company."),
]

# 创建表的配置
table_ref = dataset_ref.table("Finance_Situation")
table = bigquery.Table(table_ref, schema=schema)

# API 请求 - 创建表
created_table = client.create_table(table)
print("Table {} created.".format(created_table.table_id))

Table Finance_Situation created.


In [8]:
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED", description="The Unique id of the stock."),
    bigquery.SchemaField("company_id", "STRING", mode="REQUIRED", description="The short name of the company of the stock."),
    bigquery.SchemaField("Date", "DATE", mode="REQUIRED", description="The date of the stock."),
    bigquery.SchemaField("Adj_Close", "FLOAT", mode="NULLABLE", description="The adjusted close price for the stock."),
    bigquery.SchemaField("Log_Return", "FLOAT", mode="NULLABLE", description="The log return of the stock."),
]

# 创建表的配置
table_ref = dataset_ref.table("Stock_Data")
table = bigquery.Table(table_ref, schema=schema)

# API 请求 - 创建表
created_table = client.create_table(table)
print("Table {} created.".format(created_table.table_id))

Table Stock_Data created.


In [11]:
dataset_ref = client.dataset(dataset_id)

# 定义表的模式
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED", description="The Unique id of the news."),
    bigquery.SchemaField("title", "STRING", mode="REQUIRED", description="The title of the news."),
    bigquery.SchemaField("text", "STRING", mode="REQUIRED", description="The text of the news."),
    bigquery.SchemaField("url", "STRING", mode="NULLABLE", description="The URL of the news's source."),
    bigquery.SchemaField("publish_date", "DATE", mode="REQUIRED", description="The publish date of the news."),
    bigquery.SchemaField("author", "STRING", mode="NULLABLE", description="The author of the news."),
    bigquery.SchemaField("language", "STRING", mode="NULLABLE", description="The language of the news."),
    bigquery.SchemaField("source_country", "STRING", mode="NULLABLE", description="The country of the news's source."),
    bigquery.SchemaField("sentiment", "FLOAT", mode="NULLABLE", description="The sentiment score of the news."),
]

# 创建表的配置
table_ref = dataset_ref.table("news")
table = bigquery.Table(table_ref, schema=schema)

# API 请求 - 创建表
created_table = client.create_table(table)
print("Table {} created.".format(created_table.table_id))

Table news created.


In [12]:
tables = client.list_tables(dataset_ref)
for table in tables:
    print("Table ID: {}".format(table.table_id))

Table ID: COMPANY
Table ID: Finance_Situation
Table ID: Stock_Data
Table ID: news
Table ID: raw_news
Table ID: raw_news_test


In [2]:
#check all the tables

# API 请求 - 获取数据集中的所有表
tables = client.list_tables(dataset_ref)

print("Tables contained in '{}':".format(dataset_id))
for table in tables:
    print("Table ID: {}".format(table.table_id))

    # 获取表的引用和模式
    table_ref = dataset_ref.table(table.table_id)
    table = client.get_table(table_ref)  # API 请求

    # 显示表的列名和类型
    print("Schema of '{}':".format(table.table_id))
    for schema_field in table.schema:
        print("Column name: {}, Column type: {}".format(schema_field.name, schema_field.field_type))

    # API 请求 - 预览表中的前几行数据
    preview = client.list_rows(table, max_results=5).to_dataframe()
    print("Preview of the first few rows from '{}':".format(table.table_id))
    print(preview)
    print("\n")  # 添加额外空行以增强可读性

Tables contained in 'raw_data':
Table ID: raw_news
Schema of 'raw_news':
Preview of the first few rows from 'raw_news':
Empty DataFrame
Columns: []
Index: []


Table ID: raw_news_test
Schema of 'raw_news_test':
Column name: author, Column type: STRING
Column name: title, Column type: STRING
Column name: description, Column type: STRING
Column name: url, Column type: STRING
Column name: urlToImage, Column type: STRING
Column name: publishedAt, Column type: STRING
Column name: content, Column type: STRING
Column name: source_id, Column type: STRING
Column name: source_name, Column type: STRING
Preview of the first few rows from 'raw_news_test':
              author                                              title  \
0  InvestorsObserver  PowerBand Receives TSXV Approval for Extension...   
1      coinpedia.org  Analyst Predicts XRP’s Ascent to $1; Breakout ...   
2      coinpedia.org  Destroyer, Set To Release New ‘Black Account’ ...   
3                TNN  Law should keep pace with f