In [7]:
import pandas as pd
import numpy as np

from google.cloud import storage

In [2]:
data = {
    'integer_col': [1, 2, 3],
    'float_col': [1.1, 2.2, 3.3],
    'double_col': [1.111111111, 2.222222222, 3.333333333],  # Double datatype column
    'decimal_col': [0.12345, 0.23456, 0.34567],  # Decimal datatype column
    'string_col': ['apple', 'banana', 'orange'],
    'boolean_col': [True, False, True],
    'timestamp_col': [pd.Timestamp('2022-01-01'), pd.Timestamp('2022-01-02'), pd.Timestamp('2022-01-03')],
    'date_col': [pd.Timestamp('2022-01-01').date(), pd.Timestamp('2022-01-02').date(), pd.Timestamp('2022-01-03').date()],
    'datetime_with_timezone_col': [
        pd.Timestamp('2022-01-01 12:00:00', tz='UTC'),
        pd.Timestamp('2022-01-02 12:00:00', tz='UTC'),
        pd.Timestamp('2022-01-03 12:00:00', tz='UTC')
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Display DataFrame
df.head()


Unnamed: 0,integer_col,float_col,double_col,decimal_col,string_col,boolean_col,timestamp_col,date_col,datetime_with_timezone_col
0,1,1.1,1.111111,0.12345,apple,True,2022-01-01,2022-01-01,2022-01-01 12:00:00+00:00
1,2,2.2,2.222222,0.23456,banana,False,2022-01-02,2022-01-02,2022-01-02 12:00:00+00:00
2,3,3.3,3.333333,0.34567,orange,True,2022-01-03,2022-01-03,2022-01-03 12:00:00+00:00


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype              
---  ------                      --------------  -----              
 0   integer_col                 3 non-null      int64              
 1   float_col                   3 non-null      float64            
 2   double_col                  3 non-null      float64            
 3   decimal_col                 3 non-null      float64            
 4   string_col                  3 non-null      object             
 5   boolean_col                 3 non-null      bool               
 6   timestamp_col               3 non-null      datetime64[ns]     
 7   date_col                    3 non-null      object             
 8   datetime_with_timezone_col  3 non-null      datetime64[ns, UTC]
dtypes: bool(1), datetime64[ns, UTC](1), datetime64[ns](1), float64(3), int64(1), object(2)
memory usage: 323.0+ bytes


 - integer_col: Contains integer values (int64 data type).
 - float_col: Contains floating-point values (float64 data type).
 - string_col: Contains string values (object data type).
 - boolean_col: Contains boolean values (bool data type).
 - timestamp_col: Contains timestamp values (datetime64[ns] data type).
 - datetime64[ns, UTC]

In [5]:
# Assuming df is your Pandas DataFrame
# Define the filename and folder path
filename = 'test_bg_df_sample_data.parquet'
folder_path = '/Users/leonsmith/explore/fs/assetinsure/asset_secure/fs-as-disc/'  # Replace this with your actual folder path

# Write DataFrame to a Parquet file in the specified folder
df.to_parquet(folder_path + filename)

In [15]:
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
#     The ID of your GCS bucket
#     bucket_name = "fs-assetsecure-testing"
#     The path to your file to upload
#     source_file_name = "Users/leonsmith/explore/fs/assetinsure/asset_secure/fs-as-disc/test_bg_df_sample_data.parquet"
#     The ID of your GCS object
#     destination_blob_name = "test_bg_df_sample_data.parquet"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Optional: set a generation-match precondition to avoid potential race conditions
    # and data corruptions. The request to upload is aborted if the object's
    # generation number does not match your precondition. For a destination
    # object that does not yet exist, set the if_generation_match precondition to 0.
    # If the destination object already exists in your bucket, set instead a
    # generation-match precondition using its generation number.
    generation_match_precondition = 0

    blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition)

    print(
        f"File {source_file_name} uploaded to {destination_blob_name}."
    )


In [16]:
# The ID of your GCS bucket
bucket_name = "fs-assetsecure-testing"
# The path to your file to upload
source_file_name = folder_path + filename
# The ID of your GCS object
destination_blob_name = "test_bg_df_sample_data.parquet"

In [17]:
upload_blob(bucket_name, source_file_name, destination_blob_name)

File /Users/leonsmith/explore/fs/assetinsure/asset_secure/fs-as-disc/test_bg_df_sample_data.parquet uploaded to test_bg_df_sample_data.parquet.
