In [1]:
# !pip install -q kaggle 

In [2]:
import os
import shutil
import kaggle
import json
from kaggle.api.kaggle_api_extended import KaggleApi

In [3]:
def kaggle_config(kaggle_credentials):
    
    # Path to the .kaggle directory
    kaggle_dir = os.path.join(os.path.expanduser('~'), '.kaggle')

    # Ensure the .kaggle directory exists, create if not
    if not os.path.exists(kaggle_dir):
        os.makedirs(kaggle_dir)
        print(f"Created directory: {kaggle_dir}")
    else:
        print(f"Directory already exists: {kaggle_dir}")

    # We Define kaggle_json before using it
    # Path to the kaggle.json file
    kaggle_json = os.path.join(kaggle_dir, 'kaggle.json')

    # Here we check if kaggle.json already exists
    if not os.path.exists(kaggle_json):
        print("Creating kaggle.json file with provided credentials...")
        # Try to write the credentials to kaggle.json
        try:
            with open(kaggle_json, 'w') as f:
                json.dump(kaggle_credentials, f)
            # Set appropriate permissions
            os.chmod(kaggle_json, 0o600) # ensures that only the file owner has permission to read and write the file
            print("kaggle.json created successfully.")
        except Exception as e:
            print(f"Error writing kaggle.json: {str(e)}")    
        else:
            print("kaggle.json already exists. Using existing credentials.")


In [4]:
def download_kaggle_datasets(target_dir='../data'):
    # Initialize Kaggle API
    api = KaggleApi()
    
    # Authenticate using the credentials set in environment variables
    try:
        # Authenticate using the credentials set in environment variables
        api.authenticate()
    except Exception as e:
        print(f"Authentication failed: {str(e)}")
        return

    # make sure the data directory exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
        print(f"Created directory: {target_dir}")
    else:
        print(f"Directory already exists: {target_dir}")
            
    # Download S&P500 stocks dataset
    print("Downloading S&P500 dataset...")
    try:
        # 'camnugent/sandp500' is the dataset identifier on Kaggle
        # 'path' specifies where to save the dataset
        # 'unzip=True' automatically extracts the downloaded zip file
        api.dataset_download_files('camnugent/sandp500', path=target_dir, unzip=True)
        print("S&P500 dataset downloaded successfully.")
    except Exception as e:
        print(f"Failed to download S&P500 dataset: {str(e)}")

    # Download Red Wine dataset
    print("Downloading Red Wine dataset...")
    try:
        # 'uciml/red-wine-quality-cortez-et-al-2009' is the dataset identifier on Kaggle
        api.dataset_download_files('uciml/red-wine-quality-cortez-et-al-2009', path=target_dir, unzip=True)
        print("Red Wine dataset downloaded successfully.")
    except Exception as e:
        print(f"Failed to download Red Wine dataset: {str(e)}")

    print("Datasets downloaded successfully to the 'data' directory.")



In [5]:
if __name__ == "__main__":

    try:
        # Kaggle credentials in order to download files
        kaggle_credentials = {
            "username": "oscarrangel",
            "key": "6a280777043fa3fefa92c0f6e72bc551"
        }

        # Configure environment to be able to log in and download files from Kaggle.
        kaggle_config(kaggle_credentials)

        # Now we can download the files.
        download_kaggle_datasets()
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        

Directory already exists: /root/.kaggle
Directory already exists: ../data
Downloading S&P500 dataset...
Dataset URL: https://www.kaggle.com/datasets/camnugent/sandp500
S&P500 dataset downloaded successfully.
Downloading Red Wine dataset...
Dataset URL: https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009
Red Wine dataset downloaded successfully.
Datasets downloaded successfully to the 'data' directory.
