<a href="https://colab.research.google.com/github/Okitrader/freecodecamp_agloML_crypto/blob/main/FCC_Algo_Proj01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

All Packages Needed:
pandas, numpy, matplotlib, statsmodels, pandas_datareader, datetime, yfinance, sklearn, PyPortfolioOpt

In [1]:
!pip install pandas warnings pandas_ta numpy matplotlib statsmodels pandas-datareader datetime yfinance scikit-learn PyPortfolioOpt


[0m

In [2]:
!pip install pandas_ta requests tqdm

Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218907 sha256=a0f32349446d1cb19492fb0108f16ea26c288b345b415b79595a4ab6effd483a
  Stored in directory: /root/.cache/pip/wheels/69/00/ac/f7fa862c34b0e2ef320175100c233377b4c558944f12474cf0
Successfully built pandas_ta
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0


In [3]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import pandas_ta
import warnings
warnings.filterwarnings('ignore')


## Create a list of top Cryptos Ranked by Coinmarket Cap.

In [5]:
# Import necessary libraries
import requests
import pandas as pd
from google.colab import userdata

# Retrieve API key from Colab user data
api_key = userdata.get('CMCKey')

if api_key is None:
    print("API key not found. Please check that the secret name is correct and that the notebook has access to it.")
else:
    print("API key retrieved successfully.")

# Set the limit for the number of cryptocurrencies to retrieve
TOP_CRYPTO_LIMIT = 125  # Change this number to fetch a different number of top cryptocurrencies

# CoinMarketCap API URL and parameters
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
parameters = {
    'start': '1',
    'limit': str(TOP_CRYPTO_LIMIT),  # Convert the limit to a string to use in the parameters
    'convert': 'USD'
}
headers = {
    'Accepts': 'application/json',
    'X-CMC_PRO_API_KEY': api_key,
}

# Make the API request
try:
    response = requests.get(url, headers=headers, params=parameters)
    response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    data = response.json()

    # Parse the data to extract 'cmc_rank' and 'symbol'
    cryptos = [{'cmc_rank': entry['cmc_rank'], 'symbol': entry['symbol']} for entry in data['data']]

    # Create DataFrame and name it top_cryptos (renamed to reflect the variable limit)
    top_cryptos = pd.DataFrame(cryptos)

    # Verify that the ranks are within the top limit set
    if top_cryptos['cmc_rank'].max() <= TOP_CRYPTO_LIMIT and top_cryptos['cmc_rank'].min() >= 1:
        print(f"Successfully retrieved top {TOP_CRYPTO_LIMIT} cryptocurrencies by cmc_rank.")
    else:
        print(f"Some cryptocurrencies may not be in the top {TOP_CRYPTO_LIMIT} by cmc_rank.")

    # Display the DataFrame
    print(top_cryptos.head())
    print(top_cryptos.tail())
except requests.exceptions.HTTPError as errh:
    print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
    print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
    print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
    print("Oops: Something Else", err)

API key retrieved successfully.
Successfully retrieved top 125 cryptocurrencies by cmc_rank.
   cmc_rank symbol
0         1    BTC
1         2    ETH
2         3   USDT
3         4    BNB
4         5    XRP
     cmc_rank symbol
120       121    CVX
121       122    SFP
122       123  TFUEL
123       124    HNT
124       125    JST


## Next we create a Dataframe for our OHLCV
we are not using adjusted close as in the video examples, hence our formulas will have to change with our version

In [7]:
from tqdm import tqdm
import pprint as pp
import datetime
pd.set_option('display.max_columns', None)
from google.colab import userdata # Ensure this contains your CoinMarketCap API key

# Retrieve API key from Colab user data
api_key = userdata.get('CMCKey')

# Function to fetch and process historical cryptocurrency data for each symbol
def fetch_historical_crypto_data(start_date=None, end_date=None, days_back=29):
    # Default to current date if end_date is not specified
    if end_date is None:
        end_date = datetime.datetime.now()

    # Default to 29 days back if start_date is not specified
    if start_date is None:
        start_date = end_date - datetime.timedelta(days=days_back)

    # Convert dates to string format if they are datetime objects
    if isinstance(start_date, datetime.datetime):
        start_date = start_date.strftime('%Y-%m-%d')
    if isinstance(end_date, datetime.datetime):
        end_date = end_date.strftime('%Y-%m-%d')

    base_url = 'https://pro-api.coinmarketcap.com/v2/cryptocurrency/ohlcv/historical'
    headers = {
        'Accepts': 'application/json',
        'X-CMC_PRO_API_KEY': api_key,
    }
    all_data = []

    # Using tqdm for progress display
    for symbol in tqdm(top_cryptos['symbol'], desc="Processing", unit="symbol"):
        url = f'{base_url}?symbol={symbol}'
        parameters = {
            'time_start': start_date,
            'time_end': end_date,
            'convert': 'USD'
        }

        try:
            response = requests.get(url, headers=headers, params=parameters)
            data = response.json()

            # Extract and flatten the quotes data
            quotes = data.get('data', {}).get(symbol, [])
            for quote in quotes:
                for q in quote.get('quotes', []):
                    usd_data = q['quote']['USD']
                    entry = {
                        'symbol': symbol,
                        'timestamp': usd_data['timestamp'],
                        'open': usd_data['open'],
                        'high': usd_data['high'],
                        'low': usd_data['low'],
                        'close': usd_data['close'],
                        'volume': usd_data['volume']
                    }
                    all_data.append(entry)

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for {symbol}: {e}")

    # Convert all_data to a DataFrame
    top_OHLCV = pd.DataFrame(all_data)

    # Set the index to a MultiIndex of 'timestamp' and 'symbol'
    top_OHLCV.set_index(['timestamp', 'symbol'], inplace=True)

    # Ensure the column names are lowercase
    top_OHLCV.columns = top_OHLCV.columns.str.lower()

    # Round the volume column to 4 decimal places
    top_OHLCV['volume'] = top_OHLCV['volume'].round(6)

    # Return the final DataFrame
    return top_OHLCV

# Fetching data
top_OHLCV = fetch_historical_crypto_data()

# Print the data using pretty print
pp.pprint(top_OHLCV.head())
pp.pprint(top_OHLCV.tail())

Processing: 100%|██████████| 125/125 [00:41<00:00,  2.99symbol/s]

                                         open          high           low  \
timestamp                symbol                                             
2023-10-14T23:59:59.999Z BTC     26866.203245  26968.999218  26814.586586   
2023-10-15T23:59:59.999Z BTC     26858.011726  27289.170319  26817.894010   
2023-10-16T23:59:59.999Z BTC     27162.628229  29448.139037  27130.473478   
2023-10-17T23:59:59.999Z BTC     28522.098166  28618.752390  28110.186117   
2023-10-18T23:59:59.999Z BTC     28413.530808  28889.009589  28174.252551   

                                        close        volume  
timestamp                symbol                              
2023-10-14T23:59:59.999Z BTC     26861.706203  5.388117e+09  
2023-10-15T23:59:59.999Z BTC     27159.652919  7.098202e+09  
2023-10-16T23:59:59.999Z BTC     28519.466679  2.783388e+10  
2023-10-17T23:59:59.999Z BTC     28415.748140  1.487253e+10  
2023-10-18T23:59:59.999Z BTC     28328.341152  1.272413e+10  
                          




## Calculate features and technical indicators for each Crypto:
- Garman-Klass Volatility
- RSI
- Bollinger Bands
- ATR
- MACD
- Dollar Volume

The updated Garman-Klass Volatility formula for continuous markets like currencies, using Close instead of Adjusted Close, is given by:

Garman-Klass Volatility = ((ln(High) - ln(Low))^2 / 2) - (2ln(2) - 1)(ln(Close) - ln(Open))^2



In [9]:
# Garman-Klass Volatility
top_OHLCV['garman_klass_vol'] = ((np.log(top_OHLCV['high']) - np.log(top_OHLCV['low']))**2) / 2 - (2 * np.log(2) - 1) * ((np.log(top_OHLCV['close']) - np.log(top_OHLCV['open']))**2)

# RSI
top_OHLCV['rsi'] = top_OHLCV.groupby(level=1)['close'].transform(lambda x: pandas_ta.rsi(close=x, length=20))

top_OHLCV


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,garman_klass_vol,rsi
timestamp,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-10-14T23:59:59.999Z,BTC,26866.203245,26968.999218,26814.586586,26861.706203,5.388117e+09,0.000016,
2023-10-15T23:59:59.999Z,BTC,26858.011726,27289.170319,26817.894010,27159.652919,7.098202e+09,0.000104,
2023-10-16T23:59:59.999Z,BTC,27162.628229,29448.139037,27130.473478,28519.466679,2.783388e+10,0.002442,
2023-10-17T23:59:59.999Z,BTC,28522.098166,28618.752390,28110.186117,28415.748140,1.487253e+10,0.000155,
2023-10-18T23:59:59.999Z,BTC,28413.530808,28889.009589,28174.252551,28328.341152,1.272413e+10,0.000310,
...,...,...,...,...,...,...,...,...
2023-11-06T23:59:59.999Z,FIL,4.116601,4.379466,4.074109,4.336670,1.589070e+08,0.001564,76.271740
2023-11-07T23:59:59.999Z,FIL,4.336750,4.343247,4.094123,4.243912,1.584706e+08,0.001564,71.516642
2023-11-08T23:59:59.999Z,FIL,4.243969,4.466300,4.223579,4.445552,1.253345e+08,0.000729,75.072715
2023-11-09T23:59:59.999Z,FIL,4.444945,4.680075,4.261902,4.495716,2.588597e+08,0.004331,75.861893


### Now calculate the Bollinger Bands

In [11]:
import numpy as np

def compute_bollinger_bands(df, length=20, num_std=2):
    """
    Calculate Bollinger Bands for a given DataFrame.

    Parameters:
    df (DataFrame): DataFrame with 'close' prices.
    length (int): The number of periods for the moving average.
    num_std (float): The number of standard deviations from the moving average.

    Returns:
    DataFrame: DataFrame with Bollinger Band columns added.
    """

    # Adjust the number of standard deviations based on the length
    if length > 20:
        num_std = 2.1  # Increasing for length > 20
    elif length < 20:
        num_std = 1.9  # Decreasing for length < 20

    # Calculate the moving average (middle band)
    df[f'bb_mid_{length}'] = df['close'].rolling(window=length, min_periods=1).mean()

    # Calculate the standard deviation
    rolling_std = df['close'].rolling(window=length, min_periods=1).std()

    # Calculate upper and lower bands
    df[f'bb_high_{length}'] = df[f'bb_mid_{length}'] + (rolling_std * num_std)
    df[f'bb_low_{length}'] = df[f'bb_mid_{length}'] - (rolling_std * num_std)

    return df

# Apply the function to your DataFrame
top_OHLCV = compute_bollinger_bands(top_OHLCV, length=20)  # Example usage with length=20
top_OHLCV

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,garman_klass_vol,rsi,bb_mid_20,bb_high_20,bb_low_20
timestamp,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-10-14T23:59:59.999Z,BTC,26866.203245,26968.999218,26814.586586,26861.706203,5.388117e+09,0.000016,,26861.706203,,
2023-10-15T23:59:59.999Z,BTC,26858.011726,27289.170319,26817.894010,27159.652919,7.098202e+09,0.000104,,27010.679561,27432.039848,26589.319274
2023-10-16T23:59:59.999Z,BTC,27162.628229,29448.139037,27130.473478,28519.466679,2.783388e+10,0.002442,,27513.608600,29281.099384,25746.117816
2023-10-17T23:59:59.999Z,BTC,28522.098166,28618.752390,28110.186117,28415.748140,1.487253e+10,0.000155,,27739.143485,29441.065398,26037.221572
2023-10-18T23:59:59.999Z,BTC,28413.530808,28889.009589,28174.252551,28328.341152,1.272413e+10,0.000310,,27856.983018,29422.271094,26291.694943
...,...,...,...,...,...,...,...,...,...,...,...
2023-11-06T23:59:59.999Z,FIL,4.116601,4.379466,4.074109,4.336670,1.589070e+08,0.001564,76.271740,3.737999,4.385094,3.090904
2023-11-07T23:59:59.999Z,FIL,4.336750,4.343247,4.094123,4.243912,1.584706e+08,0.001564,71.516642,3.791424,4.419091,3.163757
2023-11-08T23:59:59.999Z,FIL,4.243969,4.466300,4.223579,4.445552,1.253345e+08,0.000729,75.072715,3.854006,4.480419,3.227594
2023-11-09T23:59:59.999Z,FIL,4.444945,4.680075,4.261902,4.495716,2.588597e+08,0.004331,75.861893,3.918046,4.531099,3.304993
