In [2]:
import pandas as pd
import os

In [3]:
# Function to import CSV files and process the data
def import_and_process_files(folder_path):
    files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    dataframes = {}
    
    # Import each file into a dataframe
    for file in files:
        df = pd.read_csv(os.path.join(folder_path, file), parse_dates=['Date'])
        df.set_index('Date', inplace=True)
        dataframes[file] = df
    
    return dataframes

# Function to calculate weekly returns and add start of week column
def calculate_weekly_returns(dataframes):
    processed_dataframes = {}
    for name, df in dataframes.items():
        # Resample to weekly data and calculate percentage return
        weekly_df = df.resample('W').last()
        weekly_df['Weekly Return (%)'] = weekly_df['Close'].pct_change() * 100
        weekly_df['Start of Week'] = weekly_df.index - pd.to_timedelta(6, unit='d')
        
        # Keep only the 'Start of Week' and 'Weekly Return (%)' columns
        weekly_df = weekly_df[['Start of Week', 'Weekly Return (%)']]
        
        processed_dataframes[name] = weekly_df
    return processed_dataframes

# Function to trim dataframes based on the youngest asset
def trim_dataframes_to_youngest_asset(dataframes):
    # Find the most recent start date among all dataframes
    most_recent_start_date = max(df.index[0] for df in dataframes.values())
    
    # Trim each dataframe to start from the most recent start date
    trimmed_dataframes = {name: df[df.index >= most_recent_start_date] for name, df in dataframes.items()}
    return trimmed_dataframes

# Function to filter dataframes to include only selected assets
def filter_assets(dataframes, selected_assets):
    return {name: df for name, df in dataframes.items() if name in selected_assets}

# Function to trim dataframes based on a user-defined start and end date in YYYY-MM format
def trim_dataframes_by_date_range(dataframes, start_date, end_date):
    # Convert to first day of the month
    start_date = pd.to_datetime(start_date + '-01')
    end_date = pd.to_datetime(end_date + '-01') + pd.offsets.MonthEnd(0)
    
    trimmed_dataframes = {name: df[(df.index >= start_date) & (df.index <= end_date)] for name, df in dataframes.items()}
    return trimmed_dataframes


In [4]:
# Folder path
folder_path = "/Users/robengin/Desktop/Unipd/GitHub/Opt_Project/OLD Project/CRYPTO"

# Import and process the files
crypto_data = import_and_process_files(folder_path)

# Calculate weekly returns and add start of week column
processed_data = calculate_weekly_returns(crypto_data)

# Print the start dates of each crypto asset
for name, df in processed_data.items():
    print(f"{name}: Start Date = {df.index[0].date()}")

# Trim dataframes to the most recent start date
trimmed_data = trim_dataframes_to_youngest_asset(processed_data)

# Display the processed data for verification
for name, df in trimmed_data.items():
    print(f"\n{name} Processed Data:")
    print(df.head())


UNI-USD.csv: Start Date = 2020-09-20
BTC-USD.csv: Start Date = 2014-09-21
SUSHI-EUR.csv: Start Date = 2020-08-30
XRP-USD.csv: Start Date = 2017-11-12
ADA-USD.csv: Start Date = 2017-11-12
MATIC-USD.csv: Start Date = 2019-04-28
DOT-USD.csv: Start Date = 2020-08-23
ETH-EUR.csv: Start Date = 2017-11-12

UNI-USD.csv Processed Data:
           Start of Week  Weekly Return (%)
Date                                       
2020-09-20    2020-09-14                NaN
2020-09-27    2020-09-21          -8.834454
2020-10-04    2020-09-28         -23.258680
2020-10-11    2020-10-05          -7.677589
2020-10-18    2020-10-12          -3.553955

BTC-USD.csv Processed Data:
           Start of Week  Weekly Return (%)
Date                                       
2020-09-20    2020-09-14           5.952442
2020-09-27    2020-09-21          -1.490198
2020-10-04    2020-09-28          -0.980825
2020-10-11    2020-10-05           6.697531
2020-10-18    2020-10-12           0.871189

SUSHI-EUR.csv Processed D

In [5]:
# Filter to include only selected assets (example: 'BTC-USD.csv' and 'ETH-EUR.csv')
selected_assets = ['BTC-USD.csv', 'ETH-EUR.csv']
filtered_data = filter_assets(trimmed_data, selected_assets)

In [6]:
# Trim dataframes based on a user-defined start and end date (example: '2018-01-01' to '2020-12-31')
start_date = '2012-01'
end_date = '2020-12'
date_trimmed_data = trim_dataframes_by_date_range(filtered_data, start_date, end_date)