In [1]:
import os
import shutil
import subprocess
import tempfile
from google.cloud import storage


In [2]:
# Variables
LOCAL_REPO = "/home/manormanore/Documents/Git_Hub/StockPricePrediction/"
BUCKET_NAME = "stock_price_prediction_dataset"
BUCKET_PATH = "Codefiles"
PROJECT_ID = "striped-graph-440017-d7"

# Initialize GCS client using Application Default Credentials (ADC)
client = storage.Client(project=PROJECT_ID)
bucket = client.get_bucket(BUCKET_NAME)


In [3]:

# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
    # Get list of files tracked by Git, excluding .gitignore patterns
    result = subprocess.run(
        ["git", "ls-files", "--cached", "--others", "--exclude-standard"],
        cwd=LOCAL_REPO,
        capture_output=True,
        text=True,
    )
    files = result.stdout.splitlines()
    
    # Filter for .py, .h5, and .ipynb files and copy them to the temp directory
    for file in files:
        if file.endswith(".py") or file.endswith(".h5") or file.endswith(".ipynb"):
            src_path = os.path.join(LOCAL_REPO, file)
            dest_path = os.path.join(temp_dir, file)
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            shutil.copy2(src_path, dest_path)

    # Upload the temp directory contents to GCP Bucket
    for root, _, files in os.walk(temp_dir):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, temp_dir)
            blob_path = os.path.join(BUCKET_PATH, relative_path)
            blob = bucket.blob(blob_path)
            blob.upload_from_filename(local_path)
            print(f"Uploaded {relative_path} to gs://{BUCKET_NAME}/{blob_path}")

print("Selected .py, .h5, and .ipynb files uploaded to GCP bucket, excluding .gitignore entries.")


Uploaded GCP/filesbucket.ipynb to gs://stock_price_prediction_dataset/Codefiles/GCP/filesbucket.ipynb
Uploaded GCP/synclocal.ipynb to gs://stock_price_prediction_dataset/Codefiles/GCP/synclocal.ipynb
Uploaded GCP/GCPresorce.py to gs://stock_price_prediction_dataset/Codefiles/GCP/GCPresorce.py
Uploaded src/linear_regression.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/linear_regression.ipynb
Uploaded src/XGBoost.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/XGBoost.ipynb
Uploaded src/data_preprocessing.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/data_preprocessing.ipynb
Uploaded src/SVM.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/SVM.ipynb
Uploaded src/Feature Engineering.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/Feature Engineering.ipynb
Uploaded src/PROJECT_DATA_CLEANING.ipynb to gs://stock_price_prediction_dataset/Codefiles/src/PROJECT_DATA_CLEANING.ipynb
Uploaded src/FeatureEng_and_ModelBiasDetn.ipynb to gs://

In [None]:
BUCKET_PATH = "Codefiles"
PROJECT_ID = "striped-graph-440017-d7"

# Initialize GCS client using Application Default Credentials (ADC)
client = storage.Client(project=PROJECT_ID)
bucket = client.get_bucket(BUCKET_NAME)

In [3]:

# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
    # Get list of files tracked by Git, excluding .gitignore patterns
    result = subprocess.run(
        ["git", "ls-files", "--cached", "--others", "--exclude-standard"],
        cwd=LOCAL_REPO,
        capture_output=True,
        text=True,
    )
    files = result.stdout.splitlines()
    
    # Filter for .csv and .png files and copy them to the temp directory
    for file in files:
        if file.endswith(".csv") or file.endswith(".png"):
            src_path = os.path.join(LOCAL_REPO, file)
            dest_path = os.path.join(temp_dir, file)
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            shutil.copy2(src_path, dest_path)

    # Upload the temp directory contents to GCP Bucket
    for root, _, files in os.walk(temp_dir):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, temp_dir)
            blob_path = os.path.join(BUCKET_PATH, relative_path)
            blob = bucket.blob(blob_path)
            blob.upload_from_filename(local_path)
            print(f"Uploaded {relative_path} to gs://{BUCKET_NAME}/{blob_path}")

print("Selected CSV and PNG files uploaded to GCP bucket, excluding .gitignore entries.")


Uploaded data/final_dataset.csv to gs://stock_price_prediction_dataset/Data/data/final_dataset.csv
Uploaded data/ADS_Index.csv to gs://stock_price_prediction_dataset/Data/data/ADS_Index.csv
Uploaded data/merged_original_dataset.csv to gs://stock_price_prediction_dataset/Data/data/merged_original_dataset.csv
Uploaded data/fama_french.csv to gs://stock_price_prediction_dataset/Data/data/fama_french.csv
Uploaded data/formatted_datasets/ads_index.csv to gs://stock_price_prediction_dataset/Data/data/formatted_datasets/ads_index.csv
Uploaded data/formatted_datasets/sp500.csv to gs://stock_price_prediction_dataset/Data/data/formatted_datasets/sp500.csv
Uploaded data/formatted_datasets/historical_data.csv to gs://stock_price_prediction_dataset/Data/data/formatted_datasets/historical_data.csv
Uploaded data/formatted_datasets/fama_french.csv to gs://stock_price_prediction_dataset/Data/data/formatted_datasets/fama_french.csv
Uploaded data/formatted_datasets/fred_variables.csv to gs://stock_price_