### Downloading the Price datasets

BTC: https://www.investing.com/crypto/bitcoin/historical-data <br>
SNP500: https://www.investing.com/indices/us-spx-500-historical-data <br>
VIX: https://www.investing.com/indices/volatility-s-p-500-historical-data <br>
GOLD: https://www.investing.com/commodities/gold-historical-data

In [13]:
import pandas as pd

# Load in main price datasets
btc = pd.read_csv('data/price/btc.csv')
etc = pd.read_csv('data/price/etc.csv')
sol = pd.read_csv('data/price/sol.csv')

#### Creating functions for pre-proccesing 
# 1. Removing the Comma from the price action 
def comma_form (column): 
    column = column.str.replace(',', '')
    column = pd.to_numeric(column, errors='coerce')
    return column

# 2. Converting the Volume Function 
def convert_volume(volume):
    # Check if the value is a string and needs conversion
    if isinstance(volume, str):
        factor = 1
        if volume.endswith('K'):
            factor = 10**3
        elif volume.endswith('M'):
            factor = 10**6
        elif volume.endswith('B'):
            factor = 10**9
        # Remove the last character and convert to float
        return float(volume[:-1]) * factor
    else:
        # If not a string, return as is (assuming it's already a numeric type)
        return volume
    
# 3. Adjusting the Change % Column 
def percent(percent):
    return pd.to_numeric(percent.str.replace('%', '')) / 100

# 4. Date Selection
def date_filter(df, start_date_str, end_date_str):

    # Convert start and end date strings to datetime
    start_date = pd.to_datetime(start_date_str, dayfirst=True)
    end_date = pd.to_datetime(end_date_str, dayfirst=True)

    # Filter based on the date range
    mask = (df['Date'] >= start_date) & (df['Date'] <= end_date)
    return df.loc[mask]

### BTC PRICES

In [9]:
# 1. Converting the Date columns to Date
btc.Date = pd.to_datetime(btc.Date)

# 2. Applying the Comma function on Prices
btc.Price = comma_form(btc.Price)
btc.Open = comma_form(btc.Open)
btc.High = comma_form(btc.High)
btc.Low = comma_form(btc.Low)

# 3. Renaming Volume and applying fomatting
btc['Volume']= btc['Vol.']
btc.Volume = btc.Volume.apply(convert_volume)
    
# 4. Applying the PCT Change Column
btc['Change %'] = percent(btc['Change %'])
btc['pct_change'] = btc['Change %']

# 5. Deleting old formatted columns
btc = btc.drop(['Vol.', 'Change %'], axis = 1)

#mark.to_csv('Data/Model_Data/SOL_Latest.csv', index = False)    

btc

Unnamed: 0,Date,Price,Open,High,Low,Volume,pct_change
0,2024-06-08,69349.9,69347.0,69572.1,69222.4,56450.0,0.0000
1,2024-06-07,69347.9,70793.4,71956.5,68620.7,82620.0,-0.0204
2,2024-06-06,70791.5,71083.6,71616.1,70178.7,49790.0,-0.0041
3,2024-06-05,71083.7,70550.9,71744.4,70397.1,67060.0,0.0076
4,2024-06-04,70549.2,68808.0,71034.2,68564.3,75690.0,0.0253
...,...,...,...,...,...,...,...
885,2022-01-05,43425.9,45833.1,47019.4,42535.1,83740.0,-0.0526
886,2022-01-04,45837.3,46435.7,47505.4,45602.1,55590.0,-0.0128
887,2022-01-03,46430.2,47293.9,47556.0,45704.0,41060.0,-0.0186
888,2022-01-02,47311.8,47738.7,47944.9,46718.2,27020.0,-0.0089


#### ETC PRICES

In [10]:
# 1. Converting the Date columns to Date
etc.Date = pd.to_datetime(etc.Date)

# 2. Applying the Comma function on Prices
etc.Price = comma_form(etc.Price)
etc.Open = comma_form(etc.Open)
etc.High = comma_form(etc.High)
etc.Low = comma_form(etc.Low)

# 3. Renaming Volume and applying fomatting
etc['Volume']= etc['Vol.']
etc.Volume = etc.Volume.apply(convert_volume)
    
# 4. Applying the PCT Change Column
etc['Change %'] = percent(etc['Change %'])
etc['pct_change'] = etc['Change %']

# 5. Deleting old formatted columns
etc = etc.drop(['Vol.', 'Change %'], axis = 1)

#mark.to_csv('Data/Model_Data/SOL_Latest.csv', index = False)    

etc

Unnamed: 0,Date,Price,Open,High,Low,Volume,pct_change
0,2024-06-08,3687.34,3678.36,3709.44,3670.34,353040.0,0.0024
1,2024-06-07,3678.37,3812.95,3840.88,3608.44,384780.0,-0.0353
2,2024-06-06,3812.95,3865.15,3878.28,3765.23,251820.0,-0.0135
3,2024-06-05,3865.14,3810.35,3885.15,3778.13,288180.0,0.0144
4,2024-06-04,3810.35,3767.10,3831.41,3743.11,247960.0,0.0115
...,...,...,...,...,...,...,...
1250,2021-01-05,1099.52,1042.48,1131.56,976.91,3250000.0,0.0548
1251,2021-01-04,1042.40,977.76,1158.27,894.24,5190000.0,0.0692
1252,2021-01-03,974.97,774.54,1008.49,769.57,4020000.0,0.2588
1253,2021-01-02,774.50,729.00,787.26,715.15,2250000.0,0.0622


#### SOL PRICES

In [14]:
# 1. Converting the Date columns to Date
sol.Date = pd.to_datetime(sol.Date)

# 3. Renaming Volume and applying fomatting
sol['Volume']= sol['Vol.']
sol.Volume = sol.Volume.apply(convert_volume)
    
# 4. Applying the PCT Change Column
sol['Change %'] = percent(sol['Change %'])
sol['pct_change'] = sol['Change %']

# 5. Deleting old formatted columns
sol = sol.drop(['Vol.', 'Change %'], axis = 1)

#mark.to_csv('Data/Model_Data/SOL_Latest.csv', index = False)    

sol

Unnamed: 0,Date,Price,Open,High,Low,Volume,pct_change
0,2024-06-08,161.119,162.504,163.716,158.516,5150000.0,-0.0085
1,2024-06-07,162.504,170.114,172.583,155.049,5110000.0,-0.0447
2,2024-06-06,170.106,173.491,174.371,167.754,3070000.0,-0.0195
3,2024-06-05,173.491,171.784,175.565,171.207,3800000.0,0.0100
4,2024-06-04,171.778,164.898,171.859,164.446,3290000.0,0.0413
...,...,...,...,...,...,...,...
1249,2021-01-05,2.155,2.489,2.489,2.093,,-0.1339
1250,2021-01-04,2.489,2.161,2.489,1.945,,0.1518
1251,2021-01-03,2.161,1.796,2.295,1.796,,0.2027
1252,2021-01-02,1.796,1.837,1.986,1.733,,-0.0222
