In [18]:
import pandas as pd
import requests
from datetime import datetime
import json
import io
from minio import Minio
from dateutil.relativedelta import relativedelta

In [24]:
# Function to Setup MinIO client

def set_up_minio(connection_str,access_key,secret_key, bucket_name):
    client = Minio(
        connection_str,
        access_key=access_key,
        secret_key=secret_key,
        secure=False
    )

    # Ensure bucket exists
    if not client.bucket_exists(bucket_name):
        client.make_bucket(bucket_name)

    return client


In [25]:
# Function to get data from the api and pass to Minio
def get_data(base_url, client, bucket_name, company_name, start_date, end_date):
    """
    Download data in 'YYYY-MM' format for each month between start_date and end_date (inclusive),
    and upload to MinIO.
    """
    try:
        # Parse dates
        start = datetime.strptime(start_date, "%Y-%m")
        end = datetime.strptime(end_date, "%Y-%m")
        
        if start > end:
            raise ValueError("Start date must not be after end date.")

        current = start
        while current <= end:
            current_month = current.strftime("%Y-%m")
            params = {
                'outputsize': 'full',
                'extended_hours': 'false',
                'apikey': 'IFBBUTAZ8NWO52MF',
                'interval': '60min',
                'function': 'TIME_SERIES_INTRADAY',
                'symbol': company_name,
                'month': current_month
            }

            response = requests.get(base_url, params=params)
            try:
                upload_to_minio(client, response.json(), bucket_name, current_month)
            except Exception as e:
                print(f"Upload failed for {current_month}: {e}")
            
            current += relativedelta(months=1)

    except Exception as e:
        print(f"Error: {e}")

In [26]:
# Function to upload the json data to a minio bucket
def upload_to_minio(client,data,bucket_name,file_path):

    # Convert to bytes
    data_bytes = io.BytesIO(json.dumps(data, indent=2).encode("utf-8"))

    object_path = f"{bucket_name}/{file_path}"
    
    # Upload to MinIO
    client.put_object(
        bucket_name,
        object_path,
        data_bytes,
        length=len(data_bytes.getvalue()),
        content_type="application/json"
    )


In [30]:
# Main function that combines all the steps

def main(company_list, connection_str, access_key, secret_key, bucket_name, start_date, end_date):
    base_url = 'https://www.alphavantage.co/query'
    
    # Parse dates
    start = datetime.strptime(start_date, "%Y-%m")
    end = datetime.strptime(end_date, "%Y-%m")
    
    # Create a MinIO connection
    client = set_up_minio(connection_str, access_key, secret_key, bucket_name)

    # Get current data range if files already exist
    min_date, max_date = get_current_file(client, bucket_name)

    if min_date and max_date:
        # Convert tuples like (2023, 4) to datetime
        min_date_obj = datetime(min_date[0], min_date[1]-1, 1)
        max_date_obj = datetime(max_date[0], max_date[1]+1, 1)

        # Determine gaps before or after existing data
        if min_date_obj > start:
            new_end = min_date_obj.strftime("%Y-%m")
            for company_name in company_list:
                get_data(base_url, client, bucket_name, company_name, start_date, new_end)

        if max_date_obj < end:
            new_start = max_date_obj.strftime("%Y-%m")
            for company_name in company_list:
                get_data(base_url, client, bucket_name, company_name, new_start, end_date)

    else:
        # No file exists, get full range for all companies
        for company_name in company_list:
            get_data(base_url, client, bucket_name, company_name, start_date, end_date)
    return "Done!"


In [35]:
def get_current_file(client,bucket_name):

    # list out all the files in the bucket
    objects = client.list_objects(bucket_name,recursive=True)

    # Extract the file names
    file_names = [ os.path.basename(obj.object_name).split('.')[0] for obj in objects]

    # convert file name to a tuple (year,month)
    parsed_elements = [tuple(map(int,item.split('-'))) for item in file_names]

    # Return min and max (Year, month)
    if not parsed_elements or not parsed_elements:
        return ([],[])
    return min(parsed_elements),max(parsed_elements)

In [32]:
# Provide the inputs

company_list = ['AAPL', 'IBM', 'MSFT']
connection_str = "localhost:9000"
access_key="mariam"
secret_key="mariam123"
bucket_name = "stock-rawdata"
start_date = '2023-01'
end_date = datetime.now().strftime("%Y-%m")

In [36]:
# Run the main function
main(company_list, connection_str, access_key, secret_key, bucket_name, start_date, end_date)

'Done!'

In [None]:
df = pd.DataFrame(data['Time Series (60min)']).T.reset_index()
df['company'] = data['Meta Data']['2. Symbol']

df.rename(columns= {'index': 'date', '1. open': 'open_price', '2. high': 'high_price', '3. low': 'low_price', 
'4. close': 'close_price', '5. volume': 'volume'}, inplace = True)

In [None]:
df

In [None]:
#df.to_csv('StockData.csv')

In [None]:
import os

In [None]:
os.getcwd()