In [17]:
from google.cloud import storage, bigquery
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from google.oauth2 import service_account

project_credentials = service_account.Credentials.from_service_account_file('data-finance-final-92d8049c252f.json')
project_id = 'data-finance-final'

storage_client = storage.Client(credentials=project_credentials, project=project_id)
bucket_name = 'data_finance_final'

bucket = storage_client.get_bucket(bucket_name)
blobs = bucket.list_blobs()

bigquery_client = bigquery.Client(credentials= project_credentials,project=project_id)
dataset_id = 'stockMetaData'
dataset = bigquery.Dataset(bigquery_client.dataset(dataset_id))
dataset.location = 'US'  # Choose your location
dataset = bigquery_client.create_dataset(dataset, exists_ok=True)  # Creates the dataset if it doesn't exist

stock = "APPL"

table_id = f'{stock}_price_data'
schema = [
    bigquery.SchemaField('date', 'DATE'),
    bigquery.SchemaField('open', 'FLOAT'),
    bigquery.SchemaField('high', 'FLOAT'),
    bigquery.SchemaField('close', 'FLOAT'),
    bigquery.SchemaField('volume', 'FLOAT'),
    
    # Add other fields as per your DataFrame structure
]

df = None
table = bigquery.Table(dataset.table(table_id), schema=schema)
table = bigquery_client.create_table(table, exists_ok=True)  # Creates the table if it doesn't exist

# Process the second blob
blob_count = 0
for blob in blobs:
    blob_count += 1
    if blob_count == 2:
        data_string = blob.download_as_string()
        data = json.loads(data_string)

        # Extract stock symbol and time series data
        stock = data['Meta Data']['2. Symbol']
        time_series_data = data['Time Series (Daily)']
        
        # Convert to a DataFrame
        df = pd.DataFrame.from_dict(time_series_data, orient='index')
        df.index = pd.to_datetime(df.index)
        df.sort_index(inplace=True)

        # Reset the index and rename columns
        df.reset_index(inplace=True)
        df.rename(columns={'index': 'date'}, inplace=True)
        df.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
        df['open'] = pd.to_numeric(df['open'], errors='coerce')
        df['high'] = pd.to_numeric(df['high'], errors='coerce')
        df['low'] = pd.to_numeric(df['low'], errors='coerce')
        df['close'] = pd.to_numeric(df['close'], errors='coerce')
        df['volume'] = pd.to_numeric(df['volume'], downcast='integer', errors='coerce')
        df['date'] = pd.to_datetime(df['date'])
        # Upload to BigQuery
        df.to_gbq(destination_table=f'{dataset_id}.{table_id}', project_id=project_id, if_exists='replace', credentials=project_credentials)
        break
print(df)
# # Plotting the closing price
# if not df.empty:
#     # Ensure 'date' is datetime and 'close' is a float
#     df['date'] = pd.to_datetime(df['date'])
#     df['close'] = pd.to_numeric(df['close'], errors='coerce')
    
#     # Check for and handle any possible duplicates in the 'date' column
#     df = df.drop_duplicates(subset='date')

#     # Sort the DataFrame by 'date'
#     df = df.sort_values(by='date')
    
#     # Plotting
#     plt.figure(figsize=(15, 7))  # Modify this as needed for your data
#     plt.plot(df['date'], df['close'], label='Closing Price', color='blue')
    
#     # Format the date axis
#     plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
#     plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
    
#     # Rotate date labels
#     plt.xticks(rotation=45)
    
#     plt.title(f'Closing Price of Stock \'{stock}\' Over Time')
#     plt.xlabel('Date')
#     plt.ylabel('Closing Price')
    
#     plt.tight_layout()  # Adjust layout to fit the date labels
#     plt.legend()
#     plt.grid(True)
#     plt.show()
# else:
#     print("DataFrame is empty.")

100%|████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]

         date    open     high       low   close     volume
0  2023-06-15  183.96  186.520  183.7800  186.01   65433166
1  2023-06-16  186.73  186.990  184.2700  184.92  101256225
2  2023-06-20  184.41  186.100  184.4100  185.01   49799092
3  2023-06-21  184.90  185.410  182.5901  183.96   49515697
4  2023-06-22  183.74  187.045  183.6700  187.00   51245327
..        ...     ...      ...       ...     ...        ...
95 2023-10-31  169.35  170.900  167.9000  170.77   44846017
96 2023-11-01  171.00  174.230  170.1200  173.97   56934906
97 2023-11-02  175.52  177.780  175.4600  177.57   77334752
98 2023-11-03  174.24  176.820  173.3500  176.65   79829246
99 2023-11-06  176.38  179.430  176.2100  179.23   63841310

[100 rows x 6 columns]



