<a href="https://colab.research.google.com/github/Anudeep27-bit/ETL-migration-workflow-Alteryx-to-BigQuery-/blob/main/alt_input.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-cloud-bigquery pandas gspread oauth2client



In [None]:
from google.cloud import bigquery
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
import os
import pandas as pd

# Initialize BigQuery client
client = bigquery.Client()

# Google Drive folder ID (replace with your folder's ID)
drive_folder_id = "1nZPRsIy8_u5tJ7vNpSLEtg6c0qy6nMvG"

# Authenticate Google Drive API
def authenticate_drive():
    from google.oauth2 import service_account
    SCOPES = ['https://www.googleapis.com/auth/drive']
    SERVICE_ACCOUNT_FILE = 'path_to_your_service_account_key.json'  # Replace with your service account key file
    credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    return build('drive', 'v3', credentials=credentials)

drive_service = authenticate_drive()

# Define the query
query_template = """
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM irm-fin-acct-dp-prod.rpt_billing.billing_detail
    WHERE
        transaction_region = "NA"
        AND calendar_date BETWEEN "{start_date}" AND "{end_date}"
        AND revenue_classification = "Storage"
        AND product_summary_type = "Records Management"
    GROUP BY ALL
"""

# Define the months for extraction
months = pd.date_range(start="2025-01-01", end="2025-01-31", freq="MS")

for start_date in months:
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime("%Y-%m-%d")

    # Format the query for the specific month
    query = query_template.format(start_date=start_date_str, end_date=end_date_str)

    # Run the query
    query_job = client.query(query)
    df = query_job.to_dataframe()

    # Save to local CSV
    file_name = f"data_{start_date.strftime('%Y_%m')}.csv"
    df.to_csv(file_name, index=False)

    # Upload to Google Drive
    file_metadata = {'name': file_name, 'parents': [drive_folder_id]}
    media = MediaFileUpload(file_name, mimetype='text/csv')
    drive_service.files().create(body=file_metadata, media_body=media).execute()

    # Delete the local file
    os.remove(file_name)

print("All files successfully uploaded to Google Drive!")


In [None]:
# Authenticate with Google Cloud
from google.colab import auth
auth.authenticate_user()

# Authenticate with Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install --upgrade gspread pandas


Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.
cudf-cu12 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 w

In [None]:
client = bigquery.Client(project="irm-fin-acct-dp-prod")


In [None]:
# List datasets in the project
datasets = list(client.list_datasets())
print("Datasets in the project:")
for dataset in datasets:
    print(f"- {dataset.dataset_id}")

# List tables in the 'rpt_billing' dataset
tables = list(client.list_tables("rpt_billing"))  # Replace with the dataset name
print("\nTables in the 'rpt_billing' dataset:")
for table in tables:
    print(f"- {table.table_id}")


Datasets in the project:
- API_Analysis
- CTesting
- Etl_alt_BQ
- IYR
- IYR_Process
- IYR_Process_2025
- Insight_Scanning
- Legacy_EDP_Sandbox
- Matterhorn
- NonEDPDatasets
- OS_REPORTING
- RCOE_Requests
- RO_Tracker
- aggregated_one_stream_static
- global_customer_revenue
- inventory
- inventory_nonprod
- onestream
- rpt_billing
- salesforce_core

Tables in the 'rpt_billing' dataset:
- billing_detail
- orders_not_billed
- rpt_ar_aggregated
- rpt_monthly_customer_invoice_billing_summary
- rpt_skp_order_service_fee


In [None]:
# List all tables in the 'rpt_billing' dataset
dataset_id = "rpt_billing"
project_id = "irm-fin-acct-dp-prod"

tables = list(client.list_tables(dataset_id))
print(f"Tables in dataset '{dataset_id}':")
for table in tables:
    print(f"- {table.table_id}")


Tables in dataset 'rpt_billing':
- billing_detail
- orders_not_billed
- rpt_ar_aggregated
- rpt_monthly_customer_invoice_billing_summary
- rpt_skp_order_service_fee


In [None]:
query = """
SELECT *
FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
LIMIT 10
"""

# Execute the query and load the results into a DataFrame
query_job = client.query(query)
df = query_job.to_dataframe()

# Display the first few rows
print(df.head())


  billing_system invoicing_country_code calendar_date invoice_date  \
0            SKP                    USA    2020-08-13   2020-01-31   
1            SKP                    USA    2020-08-13   2020-01-31   
2            SKP                    USA    2020-08-31   2020-08-31   
3            SKP                    USA    2020-08-31   2020-08-31   
4            SKP                    USA    2020-08-12   2020-03-31   

  adjustment_date year_month  year_month_num  year transaction_org  \
0      2020-08-13    2020-08          202008  2020             GRO   
1      2020-08-13    2020-08          202008  2020             GRO   
2             NaT    2020-08          202008  2020             GRO   
3             NaT    2020-08          202008  2020             GRO   
4      2020-08-12    2020-08          202008  2020             GRO   

  transaction_country_code  ... trip_id remit_level revenue_recognition_date  \
0                      USA  ...       0          1M               2020-01-31  

In [None]:
import os

# Path to the folder in Google Drive
export_folder = "/content/drive/My Drive/BigQuery_Exports"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")


Folder created: /content/drive/My Drive/BigQuery_Exports


In [None]:
import pandas as pd
from google.cloud import bigquery
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Authenticate with BigQuery
from google.colab import auth
auth.authenticate_user()
client = bigquery.Client(project="irm-fin-acct-dp-prod")  # Replace with your project ID

# Define the export folder
export_folder = "/content/drive/My Drive/BQ_EXP/BigQuery_Exports_NA"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")

# Define the date ranges
months = pd.date_range(start="2023-01-01", end="2024-12-31", freq="MS")  # Monthly start dates

# Loop through each month and query data
for start_date in months:
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')  # End of the month

    # Query for the current month
    query = f"""
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
    WHERE
        transaction_region = 'NA'
        AND calendar_date BETWEEN '{start_date_str}' AND '{end_date_str}'
        AND revenue_classification = 'Storage'
        AND product_summary_type = 'Records Management'
    GROUP BY
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, number_of_months, rate_USD,
        amount_USD, bill_code_sfdc, unit_of_measure, storage_from_date, storage_to_date,
        original_invoice_id, revenue_recognition_date, bill_in_advance_flag,
        bill_in_advance_month_number, revenue_recognition_amount, allocated_quantity
    """

    # Execute the query and fetch the results
    query_job = client.query(query)
    df = query_job.to_dataframe()  # Load query results into a Pandas DataFrame

    # Save the results to a CSV file in Google Drive
    file_name = f"BigQuery_Data_{start_date.strftime('%Y_%m')}.csv"
    output_path = os.path.join(export_folder, file_name)
    df.to_csv(output_path, index=False)

    print(f"Data for {start_date_str} to {end_date_str} saved to {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder already exists: /content/drive/My Drive/BigQuery_Exports
Data for 2023-01-01 to 2023-01-31 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_01.csv
Data for 2023-02-01 to 2023-02-28 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_02.csv
Data for 2023-03-01 to 2023-03-31 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_03.csv
Data for 2023-04-01 to 2023-04-30 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_04.csv
Data for 2023-05-01 to 2023-05-31 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_05.csv
Data for 2023-06-01 to 2023-06-30 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_06.csv
Data for 2023-07-01 to 2023-07-31 saved to /content/drive/My Drive/BigQuery_Exports/BigQuery_Data_2023_07.csv
Data for 2023-08-01 to 2023-08-31 sav

In [None]:
import pandas as pd
from google.cloud import bigquery
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Authenticate with BigQuery
from google.colab import auth
auth.authenticate_user()
client = bigquery.Client(project="irm-fin-acct-dp-prod")  # Replace with your project ID

# Define the export folder
export_folder = "/content/drive/My Drive/BQ_EXP/BigQuery_Exports_India"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")

# Define the date ranges
months = pd.date_range(start="2023-01-01", end="2024-12-31", freq="MS")  # Monthly start dates

# Loop through each month and query data
for start_date in months:
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')  # End of the month

    # Query for the current month
    query = f"""
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
    WHERE
        transaction_region = 'India'
        AND calendar_date BETWEEN '{start_date_str}' AND '{end_date_str}'
        AND revenue_classification = 'Storage'
        AND product_summary_type = 'Records Management'
    GROUP BY
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, number_of_months, rate_USD,
        amount_USD, bill_code_sfdc, unit_of_measure, storage_from_date, storage_to_date,
        original_invoice_id, revenue_recognition_date, bill_in_advance_flag,
        bill_in_advance_month_number, revenue_recognition_amount, allocated_quantity
    """

    # Execute the query and fetch the results
    query_job = client.query(query)
    df = query_job.to_dataframe()  # Load query results into a Pandas DataFrame

    # Save the results to a CSV file in Google Drive
    file_name = f"BigQuery_Data_{start_date.strftime('%Y_%m')}.csv"
    output_path = os.path.join(export_folder, file_name)
    df.to_csv(output_path, index=False)

    print(f"Data for {start_date_str} to {end_date_str} saved to {output_path}")


Mounted at /content/drive
Folder created: /content/drive/My Drive/BigQuery_Exports_India
Data for 2023-01-01 to 2023-01-31 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_01.csv
Data for 2023-02-01 to 2023-02-28 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_02.csv
Data for 2023-03-01 to 2023-03-31 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_03.csv
Data for 2023-04-01 to 2023-04-30 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_04.csv
Data for 2023-05-01 to 2023-05-31 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_05.csv
Data for 2023-06-01 to 2023-06-30 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_06.csv
Data for 2023-07-01 to 2023-07-31 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_Data_2023_07.csv
Data for 2023-08-01 to 2023-08-31 saved to /content/drive/My Drive/BigQuery_Exports_India/BigQuery_

In [None]:
import pandas as pd
from google.cloud import bigquery
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Authenticate with BigQuery
from google.colab import auth
auth.authenticate_user()
client = bigquery.Client(project="irm-fin-acct-dp-prod")  # Replace with your project ID

# Define the export folder
export_folder = "/content/drive/My Drive/BQ_EXP/BigQuery_Exports_LATAM"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")

# Define the date ranges
months = pd.date_range(start="2023-01-01", end="2024-12-31", freq="MS")  # Monthly start dates

# Loop through each month and query data
for start_date in months:
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')  # End of the month

    # Query for the current month
    query = f"""
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
    WHERE
        transaction_region = 'LATAM'
        AND calendar_date BETWEEN '{start_date_str}' AND '{end_date_str}'
        AND revenue_classification = 'Storage'
        AND product_summary_type = 'Records Management'
    GROUP BY
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, number_of_months, rate_USD,
        amount_USD, bill_code_sfdc, unit_of_measure, storage_from_date, storage_to_date,
        original_invoice_id, revenue_recognition_date, bill_in_advance_flag,
        bill_in_advance_month_number, revenue_recognition_amount, allocated_quantity
    """

    # Execute the query and fetch the results
    query_job = client.query(query)
    df = query_job.to_dataframe()  # Load query results into a Pandas DataFrame

    # Save the results to a CSV file in Google Drive
    file_name = f"BigQuery_Data_{start_date.strftime('%Y_%m')}.csv"
    output_path = os.path.join(export_folder, file_name)
    df.to_csv(output_path, index=False)

    print(f"Data for {start_date_str} to {end_date_str} saved to {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder created: /content/drive/My Drive/BigQuery_Exports_LATAM
Data for 2023-01-01 to 2023-01-31 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_01.csv
Data for 2023-02-01 to 2023-02-28 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_02.csv
Data for 2023-03-01 to 2023-03-31 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_03.csv
Data for 2023-04-01 to 2023-04-30 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_04.csv
Data for 2023-05-01 to 2023-05-31 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_05.csv
Data for 2023-06-01 to 2023-06-30 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_06.csv
Data for 2023-07-01 to 2023-07-31 saved to /content/drive/My Drive/BigQuery_Exports_LATAM/BigQuery_Data_2023_07.

In [None]:
import pandas as pd
from google.cloud import bigquery
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Authenticate with BigQuery
from google.colab import auth
auth.authenticate_user()
client = bigquery.Client(project="irm-fin-acct-dp-prod")  # Replace with your project ID

# Define the export folder
export_folder = "/content/drive/My Drive/BQ_EXP/BigQuery_Exports_EMEA"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")

# Define the date ranges
months = pd.date_range(start="2023-01-01", end="2024-12-31", freq="MS")  # Monthly start dates

# Loop through each month and query data
for start_date in months:
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')  # End of the month

    # Query for the current month
    query = f"""
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
    WHERE
        transaction_region = 'EMEA'
        AND calendar_date BETWEEN '{start_date_str}' AND '{end_date_str}'
        AND revenue_classification = 'Storage'
        AND product_summary_type = 'Records Management'
    GROUP BY
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, number_of_months, rate_USD,
        amount_USD, bill_code_sfdc, unit_of_measure, storage_from_date, storage_to_date,
        original_invoice_id, revenue_recognition_date, bill_in_advance_flag,
        bill_in_advance_month_number, revenue_recognition_amount, allocated_quantity
    """

    # Execute the query and fetch the results
    query_job = client.query(query)
    df = query_job.to_dataframe()  # Load query results into a Pandas DataFrame

    # Save the results to a CSV file in Google Drive
    file_name = f"BigQuery_Data_{start_date.strftime('%Y_%m')}.csv"
    output_path = os.path.join(export_folder, file_name)
    df.to_csv(output_path, index=False)

    print(f"Data for {start_date_str} to {end_date_str} saved to {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder created: /content/drive/My Drive/BigQuery_Exports_EMEA
Data for 2023-01-01 to 2023-01-31 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_01.csv
Data for 2023-02-01 to 2023-02-28 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_02.csv
Data for 2023-03-01 to 2023-03-31 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_03.csv
Data for 2023-04-01 to 2023-04-30 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_04.csv
Data for 2023-05-01 to 2023-05-31 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_05.csv
Data for 2023-06-01 to 2023-06-30 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_06.csv
Data for 2023-07-01 to 2023-07-31 saved to /content/drive/My Drive/BigQuery_Exports_EMEA/BigQuery_Data_2023_07.csv
Data

In [None]:
import pandas as pd
from google.cloud import bigquery
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Authenticate with BigQuery
from google.colab import auth
auth.authenticate_user()
client = bigquery.Client(project="irm-fin-acct-dp-prod")  # Replace with your project ID

# Define the export folder
export_folder = "/content/drive/My Drive/BQ_EXP/BigQuery_Exports_APAC"

# Create the folder if it doesn't exist
if not os.path.exists(export_folder):
    os.makedirs(export_folder)
    print(f"Folder created: {export_folder}")
else:
    print(f"Folder already exists: {export_folder}")

# Define the date ranges
months = pd.date_range(start="2023-01-01", end="2024-12-31", freq="MS")  # Monthly start dates

# Loop through each month and query data
for start_date in months:
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = (start_date + pd.offsets.MonthEnd(0)).strftime('%Y-%m-%d')  # End of the month

    # Query for the current month
    query = f"""
    SELECT
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, SUM(quantity) AS quantity,
        number_of_months, SUM(amount) AS amount, rate_USD, amount_USD, bill_code_sfdc, unit_of_measure,
        storage_from_date, storage_to_date, original_invoice_id, revenue_recognition_date,
        bill_in_advance_flag, bill_in_advance_month_number, revenue_recognition_amount,
        allocated_quantity
    FROM `irm-fin-acct-dp-prod.rpt_billing.billing_detail`
    WHERE
        transaction_region = 'APAC'
        AND calendar_date BETWEEN '{start_date_str}' AND '{end_date_str}'
        AND revenue_classification = 'Storage'
        AND product_summary_type = 'Records Management'
    GROUP BY
        billing_system, invoicing_country_code, calendar_date, invoice_date, invoice_id, year_month,
        transaction_country_name, transaction_cluster, transaction_region, customer_L1_code,
        customer_L1_name, imga, billing_cycle, global_industry_indicator, sf_country_parent_id,
        sf_country_parent_name, oracle_account_number, oracle_account_name, salesforce_billing_account,
        bill_code, description, product_id, product_summary_type, bill_code_data_source, gl_account,
        gl_account_description, revenue_classification, currency, rate, number_of_months, rate_USD,
        amount_USD, bill_code_sfdc, unit_of_measure, storage_from_date, storage_to_date,
        original_invoice_id, revenue_recognition_date, bill_in_advance_flag,
        bill_in_advance_month_number, revenue_recognition_amount, allocated_quantity
    """

    # Execute the query and fetch the results
    query_job = client.query(query)
    df = query_job.to_dataframe()  # Load query results into a Pandas DataFrame

    # Save the results to a CSV file in Google Drive
    file_name = f"BigQuery_Data_{start_date.strftime('%Y_%m')}.csv"
    output_path = os.path.join(export_folder, file_name)
    df.to_csv(output_path, index=False)

    print(f"Data for {start_date_str} to {end_date_str} saved to {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Folder created: /content/drive/My Drive/BigQuery_Exports_APAC
Data for 2023-01-01 to 2023-01-31 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_01.csv
Data for 2023-02-01 to 2023-02-28 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_02.csv
Data for 2023-03-01 to 2023-03-31 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_03.csv
Data for 2023-04-01 to 2023-04-30 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_04.csv
Data for 2023-05-01 to 2023-05-31 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_05.csv
Data for 2023-06-01 to 2023-06-30 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_06.csv
Data for 2023-07-01 to 2023-07-31 saved to /content/drive/My Drive/BigQuery_Exports_APAC/BigQuery_Data_2023_07.csv
Data

**#<<<<<< ##  RUN ALTERYX  ## >>>>**
---



In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from google.cloud import bigquery
import pandas as pd

# Authenticate with Google Colab
auth.authenticate_user()

# Set up Google Drive API
drive_service = build('drive', 'v3')

# Specify the folder ID
folder_id = "100gMaQKpTdrCx1IYJaAyQgNU75swnVLZ"  # Your folder ID

# Search for CSV files in the folder
query = f"'{folder_id}' in parents and mimeType='text/csv'"
response = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = response.get('files', [])

if not files:
    print("No CSV files found in the folder.")
else:
    print(f"Found {len(files)} CSV files in the folder:")
    for file in files:
        print(f"Name: {file['name']}, ID: {file['id']}")

# Combine all CSV files into a single DataFrame
combined_data = pd.DataFrame()
for file in files:
    file_name = file['name']
    file_id = file['id']

    # Download the file content
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"
    with open(file_path, "wb") as f:
        f.write(request.execute())

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)
    combined_data = pd.concat([combined_data, df], ignore_index=True)

# Preview the combined DataFrame
print("Combined DataFrame:")
print(combined_data.head())

# Set up BigQuery client
project_id = "irm-fin-acct-dp-prod"
dataset_id = "Etl_alt_BQ"
table_id = "PXQ_NA_Clean"

client = bigquery.Client(project=project_id)

# Auto-detect schema and load the combined DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")

# Load the combined data to BigQuery
job = client.load_table_from_dataframe(combined_data, f"{project_id}.{dataset_id}.{table_id}", job_config=job_config)
job.result()  # Wait for the job to complete

print(f"Data successfully loaded into {project_id}.{dataset_id}.{table_id}")


Found 7 CSV files in the folder:
Name: Q2_2023_PxQ_NA.csv, ID: 11SBewICln91AVwufXPJcGAyBUlyTgYwJ
Name: Q3_2023_PxQ_NA.csv, ID: 11feNZ5fEJEszROpHOIHEJE9DKCTccF40
Name: Q3_2024_PxQ_NA.csv, ID: 11hKKdAT0H7tfOMZOSOh86RBeY5Ew27Zg
Name: Q1_2024_PxQ_NA.csv, ID: 11NcjxD3USeFNMmUfvORYqyvaPN7xa3r7
Name: Q4_2023_PxQ_NA.csv, ID: 11lHr1pUUkkTVM8Nh57R6BS4OVLJ9rJ3q
Name: Q2_2024_PxQ_NA.csv, ID: 11T36kxa7i_csfJThfPkinxh3xzvRz8DU
Name: Q4_2024_PxQ_NA.csv, ID: 11l_SNE1MZjyn7skhl4F9oCjs34taTbjH
Combined DataFrame:
     Country Cust ID Transaction Date     Amount  Activity  Cube Price Cycle  \
0  Argentina   AK001       2023-06-01   21274.65     579.0    0.045452   MAR   
1  Argentina   AK001       2023-05-01   21274.65     579.0    0.045452   MAR   
2  Argentina   AK001       2023-04-01   42549.30    1158.0    0.045452   MAR   
3  Argentina   AK002       2023-06-01  204600.74    7355.0    0.034411   MAR   
4  Argentina   AK002       2023-05-01  113418.39    7357.0    0.034411   MAR   

             Max_M

In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from google.cloud import bigquery
import pandas as pd

# Authenticate with Google Colab
auth.authenticate_user()

# Set up Google Drive API
drive_service = build('drive', 'v3')

# Specify the folder ID
folder_id = "1wFqzM5wYLb2LFEKsQnVhH82LJ1FgC0Ve"  # Your folder ID

# Search for CSV files in the folder
query = f"'{folder_id}' in parents and mimeType='text/csv'"
response = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = response.get('files', [])

if not files:
    print("No CSV files found in the folder.")
else:
    print(f"Found {len(files)} CSV files in the folder:")
    for file in files:
        print(f"Name: {file['name']}, ID: {file['id']}")

# Combine all CSV files into a single DataFrame
combined_data = pd.DataFrame()
for file in files:
    file_name = file['name']
    file_id = file['id']

    # Download the file content
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"
    with open(file_path, "wb") as f:
        f.write(request.execute())

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)
    combined_data = pd.concat([combined_data, df], ignore_index=True)

# Preview the combined DataFrame
print("Combined DataFrame:")
print(combined_data.head())

# Set up BigQuery client
project_id = "irm-fin-acct-dp-prod"
dataset_id = "Etl_alt_BQ"
table_id = "NA_price_movements_Clean"

client = bigquery.Client(project=project_id)

# Auto-detect schema and load the combined DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")

# Load the combined data to BigQuery
job = client.load_table_from_dataframe(combined_data, f"{project_id}.{dataset_id}.{table_id}", job_config=job_config)
job.result()  # Wait for the job to complete

print(f"Data successfully loaded into {project_id}.{dataset_id}.{table_id}")


Found 8 CSV files in the folder:
Name: Q1_2024_NA_Price_Movements.csv, ID: 11NUj4Lqtk2JUvtdQzANqdGL-XHlemUw3
Name: Q4_2024_NA_Price_Movements.csv, ID: 11KQt_07hdtycUbvFlGQ0mJGgpAzEu6PS
Name: Q3_2024_NA_Price_Movements.csv, ID: 11EaN99WiVJoMqsZZXKdNKC5deij5zQ9Q
Name: Q1_2023_NA_Price_Movements.csv, ID: 10qbywdElc-kzi8D2jwjnQeY7ySZblmkz
Name: Q3_2023_NA_Price_Movements.csv, ID: 113wCH-8dunc0al6VYhnPABPS5WpY1NMY
Name: Q2_2023_NA_Price_Movements.csv, ID: 116UiLfKhCelW8TybSzHsJFiUWX82mksv
Name: Q4_2023_NA_Price_Movements.csv, ID: 11FPfYQwYv-Far7mK6x1HjqkgRYah21In
Name: Q2_2024_NA_Price_Movements.csv, ID: 10j5PR7RTuOoZB8ld5IWaIlhdZOnTSb6Z


  combined_data = pd.concat([combined_data, df], ignore_index=True)
  combined_data = pd.concat([combined_data, df], ignore_index=True)
  combined_data = pd.concat([combined_data, df], ignore_index=True)


Combined DataFrame:
          Country Cust ID Ranking Transaction Date Cycle  Total Amount  \
0       Indonesia   21150  Rank 1       2024-02-01   MAA     609040.00   
1   United States   M0167  Rank 1       2024-02-01   MAA       1256.57   
2          Canada   E6902  Rank 1       2024-03-01   MAA       1451.30   
3   United States   SKA9E  Rank 1       2024-03-01   MAA       1166.74   
4  Czech Republic   CZH85  Rank 1       2024-01-01   MAA       3578.69   

   Unit Quantity    Unit Rate  Price Movement  Price Revenue Impact  \
0         135.00  4511.407407      762.407407             102925.00   
1         637.20     1.972018        0.000000                  0.00   
2        1358.89     1.068004        0.000000                  0.00   
3        4704.60     0.248000        0.000000                  0.00   
4         654.00     5.472003        0.591009                386.52   

             Name        MOM        Q  
0  Price Increase  290375.00  Q1_2024  
1         Nothing       0.00

In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from google.cloud import bigquery
import pandas as pd

# Authenticate with Google Colab
auth.authenticate_user()

# Set up Google Drive API
drive_service = build('drive', 'v3')

# Specify the folder ID
folder_id = "12fDqdHNYq3PokSZrRNJSVggAwOzfqnjn"  # Your folder ID

# Search for CSV files in the folder
query = f"'{folder_id}' in parents and mimeType='text/csv'"
response = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = response.get('files', [])

if not files:
    print("No CSV files found in the folder.")
else:
    print(f"Found {len(files)} CSV files in the folder:")
    for file in files:
        print(f"Name: {file['name']}, ID: {file['id']}")

# Combine all CSV files into a single DataFrame
combined_data = pd.DataFrame()
for file in files:
    file_name = file['name']
    file_id = file['id']

    # Download the file content
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"
    with open(file_path, "wb") as f:
        f.write(request.execute())

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)
    combined_data = pd.concat([combined_data, df], ignore_index=True)

# Preview the combined DataFrame
print("Combined DataFrame:")
print(combined_data.head())

# Set up BigQuery client
project_id = "irm-fin-acct-dp-prod"
dataset_id = "Etl_alt_BQ"
table_id = "NA_Debits_Credits_Clean"

client = bigquery.Client(project=project_id)

# Auto-detect schema and load the combined DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")

# Load the combined data to BigQuery
job = client.load_table_from_dataframe(combined_data, f"{project_id}.{dataset_id}.{table_id}", job_config=job_config)
job.result()  # Wait for the job to complete

print(f"Data successfully loaded into {project_id}.{dataset_id}.{table_id}")


Found 1 CSV files in the folder:
Name: NA Debits & Credits.csv, ID: 12gKwFbLAM5yW1PGJKE8Zvoeo7NZ63eq2
Combined DataFrame:
     Cycle Transaction Date        Country   Cust ID      Oracle ID  \
0   Debits       2023-09-25         Canada     80519   07731.080519   
1  Credits       2023-05-09       Thailand  RE336818       30000275   
2  Credits       2023-01-31  United States     A3708   01322.0A3708   
3  Credits       2023-03-08  United States     IS475   07330.0IS475   
4  Credits       2023-02-01        Ireland     IC469  8251100.IC469   

                         Customer Name    Amount  
0   WORKPLACE SAFETY & INSURANCE BOARD  10834.80  
1                    TILLEKE & GIBBINS -49048.80  
2   MEDICAL INFORMATION MANAGEMENT SOL     -2.92  
3  MMG-MCLAREN MED GRP BAY INTERNAL ME   -208.28  
4  SOUTH INFIRMARY VICTORIA-ECG/HOLTER    -20.35  
Data successfully loaded into irm-fin-acct-dp-prod.Etl_alt_BQ.NA_Debits_Credits_Clean


In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from google.cloud import bigquery
import pandas as pd

# Authenticate with Google Colab
auth.authenticate_user()

# Set up Google Drive API
drive_service = build('drive', 'v3')

# Specify the folder ID
folder_id = "12dSomeFN1jHJnMUsyKdGnyAcGO78t68v"  # Your folder ID

# Search for CSV files in the folder
query = f"'{folder_id}' in parents and mimeType='text/csv'"
response = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = response.get('files', [])

if not files:
    print("No CSV files found in the folder.")
else:
    print(f"Found {len(files)} CSV files in the folder:")
    for file in files:
        print(f"Name: {file['name']}, ID: {file['id']}")

# Combine all CSV files into a single DataFrame
combined_data = pd.DataFrame()
for file in files:
    file_name = file['name']
    file_id = file['id']

    # Download the file content
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"
    with open(file_path, "wb") as f:
        f.write(request.execute())

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)
    combined_data = pd.concat([combined_data, df], ignore_index=True)

# Preview the combined DataFrame
print("Combined DataFrame:")
print(combined_data.head())

# Set up BigQuery client
project_id = "irm-fin-acct-dp-prod"
dataset_id = "Etl_alt_BQ"
table_id = "NA_Customer_Names_Clean"

client = bigquery.Client(project=project_id)

# Auto-detect schema and load the combined DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")

# Load the combined data to BigQuery
job = client.load_table_from_dataframe(combined_data, f"{project_id}.{dataset_id}.{table_id}", job_config=job_config)
job.result()  # Wait for the job to complete

print(f"Data successfully loaded into {project_id}.{dataset_id}.{table_id}")


Found 1 CSV files in the folder:
Name: NA Customer Names.csv, ID: 12dd79_Gx9dgB4gzcmqRsL5Dngql_cZLZ
Combined DataFrame:
         Country   Cust ID         Oracle ID  \
0      Australia  ON400467  9082113.17067066   
1  United States     J7317      07213.0J7317   
2  United States     CF869      01222.0CF869   
3         France     FY524   10683418.SFCORE   
4          Chile     CSF57         656775009   

                         Customer Name  
0                     TRANSOCEAN GROUP  
1  NEW JERSEY STATE NURSES ASSOCIATION  
2     ACC CAPITAL HOLDINGS CORPORATION  
3                  BIOTRIAL RENNES SAS  
4  COOPERATIVA AHORRO Y CRED FINANCOOP  
Data successfully loaded into irm-fin-acct-dp-prod.Etl_alt_BQ.NA_Customer_Names_Clean


In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from google.cloud import bigquery
import pandas as pd

# Authenticate with Google Colab
auth.authenticate_user()

# Set up Google Drive API
drive_service = build('drive', 'v3')

# Specify the folder ID
folder_id = "1BuuL3rW-lpJZTAiMdP4oPqw4JQGaqBx3"  # Your folder ID

# Search for CSV files in the folder
query = f"'{folder_id}' in parents and mimeType='text/csv'"
response = drive_service.files().list(q=query, fields="files(id, name)").execute()
files = response.get('files', [])

if not files:
    print("No CSV files found in the folder.")
else:
    print(f"Found {len(files)} CSV files in the folder:")
    for i, file in enumerate(files):
        print(f"{i + 1}. Name: {file['name']}, ID: {file['id']}")

# Combine all CSV files into a single DataFrame
combined_data = pd.DataFrame()
for file in files:
    file_name = file['name']
    file_id = file['id']

    # Download the file content
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"
    with open(file_path, "wb") as f:
        f.write(request.execute())

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path, low_memory=False)

    # Sanitize column names
    df.columns = [col.replace(" ", "_").replace("(", "").replace(")", "") for col in df.columns]

    # Append to combined DataFrame
    combined_data = pd.concat([combined_data, df], ignore_index=True)

# Ensure column consistency by filling missing columns
expected_columns = combined_data.columns.tolist()  # Get the union of all columns
combined_data = combined_data.reindex(columns=expected_columns, fill_value="Unknown")

# Convert numeric columns to the correct type
for column in combined_data.columns:
    if combined_data[column].dtype == 'object':  # String columns
        combined_data[column] = combined_data[column].fillna("Unknown")
    elif pd.api.types.is_numeric_dtype(combined_data[column]):  # Numeric columns
        combined_data[column] = pd.to_numeric(combined_data[column], errors='coerce').fillna(0)

# Preview combined DataFrame
print("Combined DataFrame:")
print(combined_data.head())
print(f"Total rows in combined DataFrame: {len(combined_data)}")

# Set up BigQuery client
project_id = "irm-fin-acct-dp-prod"
dataset_id = "Etl_alt_BQ"
table_id = "NA_Fixed_Billing_Clean"

client = bigquery.Client(project=project_id)

# Auto-detect schema and load the combined DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")

# Load the combined data to BigQuery
job = client.load_table_from_dataframe(combined_data, f"{project_id}.{dataset_id}.{table_id}", job_config=job_config)
job.result()  # Wait for the job to complete

print(f"Data successfully loaded into {project_id}.{dataset_id}.{table_id}")


Found 5 CSV files in the folder:
1. Name: NA_Fixed_billing_NA.csv, ID: 12V-AhcuHeNVpY3y9hl1KwKR9HsKfSs1P
2. Name: NA_Fixed_billing_EMEA.csv, ID: 12BESccM2eYLZ4YIxYxsZGmnPofNYUKcc
3. Name: NA_Fixed_billing_APAC.csv, ID: 12VPUDkUATluXtlqXJ4a3Yudk6VMxCV6I
4. Name: NA_Fixed_billing_LATAM.csv, ID: 12OwvtU8gIJKMz3pR9tD5X5A1cur2swaR
5. Name: NA_Fixed_billing_INDIA.csv, ID: 12FIaxS28SCiXTEJB2j6n6RLeKFsF1qlR
Combined DataFrame:
  billing_system invoicing_country_code calendar_date        Date    INV_#  \
0            SKP                    USA    2024-10-31  2024-10-31  JWRJ166   
1            SKP                    USA    2024-08-31  2024-08-31  JSZK971   
2            SKP                    USA    2024-03-31  2024-03-31  JJDB650   
3            SKP                    USA    2024-06-30  2024-06-30  JNZT886   
4            SKP                    USA    2024-11-30  2024-11-30  JYKW202   

  Year_&_Month        Country Cluster   Region Cust_ID  ...  Cycle  \
0      2024-10  United States    East 