# Predicting Crypto with LLMs

## Libraries

In [12]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA
from langchain_community.llms import Ollama

## Pull Crypto data

In [13]:
# Pull stock data from yfinance for the past month
def pull_stocks(ticker):
    end_date = datetime.today()
    start_date = end_date - timedelta(days=30)
    stock_data = yf.Ticker(ticker)
    stock_df = stock_data.history(start=start_date, end=end_date)
    stock_df.index = stock_df.index.tz_localize(None)  # Ensure stock data is timezone-naive
    stock_df = stock_df.reset_index()
    stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')

    stock_df['pct_change'] = stock_df['Close'].pct_change()

    stock_df = stock_df[stock_df['pct_change'].notna()==True]

    stock_df = stock_df[['Date','pct_change']]

    actual_final = stock_df.tail(1)

    # stock_df = stock_df.iloc[:-1]

    return stock_df, actual_final

btc, btc_final = pull_stocks('BTC-USD')
eth, eth_final = pull_stocks('ETH-USD')
xrp, xrp_final = pull_stocks('XRP-USD')



## Run ARIMA

In [14]:
def arima(timeseries_df):
    # Ensure 'Date' is the index and in datetime format
    timeseries_df.set_index('Date', inplace=True)
    timeseries_df.index = pd.to_datetime(timeseries_df.index)

    # Remove the last row (assumed to be NaN)
    timeseries_df = timeseries_df[:-1]

    # Convert percentage strings to float if necessary
    if timeseries_df['pct_change'].dtype == 'object':
        timeseries_df['pct_change'] = timeseries_df['pct_change'].str.rstrip('%').astype('float') / 100.0

    # Fit ARIMA model
    model = ARIMA(timeseries_df['pct_change'].dropna(), order=(1, 1, 1))
    results = model.fit()

    # Predict the next day's percentage change
    forecast = results.forecast(steps=1)
    predicted_pct_change = forecast.values[0]

    print(f"Predicted percentage change for next day: {predicted_pct_change:.6f}")

arima(btc)
arima(eth)
arima(xrp)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Predicted percentage change for next day: 0.006258
Predicted percentage change for next day: 0.015279


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Predicted percentage change for next day: 0.030995




## Prepare data for LLM

In [15]:
from io import StringIO

def convert_to_csv_string(timeseries):

    timeseries = timeseries.reset_index()

    timeseries['pct_change'] = np.round(timeseries['pct_change'], 6)

    # Remove final row
    timeseries = timeseries.iloc[:-1]

    # Convert DataFrame to CSV string
    csv_buffer = StringIO()
    timeseries.to_csv(csv_buffer, index=False)
    csv_string = csv_buffer.getvalue()

    return csv_string


btc_for_llm = convert_to_csv_string(btc)
eth_for_llm = convert_to_csv_string(eth)
xrp_for_llm = convert_to_csv_string(xrp)


In [16]:
print(btc_for_llm)

Date,pct_change
2024-11-19,0.019893
2024-11-20,0.021612
2024-11-21,0.044152
2024-11-22,0.005004
2024-11-23,-0.012327
2024-11-24,0.002419
2024-11-25,-0.050111
2024-11-26,-0.011997
2024-11-27,0.043237
2024-11-28,-0.003231
2024-11-29,0.018913
2024-11-30,-0.010388
2024-12-01,0.008613
2024-12-02,-0.01454
2024-12-03,0.001428
2024-12-04,0.028816
2024-12-05,-0.022021
2024-12-06,0.034445
2024-12-07,2.6e-05
2024-12-08,0.013137
2024-12-09,-0.037569
2024-12-10,-0.007772
2024-12-11,0.046523
2024-12-12,-0.011169
2024-12-13,0.014156
2024-12-14,-0.00085
2024-12-15,0.028861
2024-12-16,0.016597
2024-12-17,0.001046



## Run LLM

Note: This requires having an active local Ollama server connection running and installing the llama3, mistral, and gemma3 models

In [17]:
## Note: Change the date of the prediction/forecast for your own use (here, 2024-06-28)

def predict_timeseries(timeseries):
    output = llm.invoke(f"""
        You are a large language model with time series forecasting capabilities.
        Predict the percent change for the day immediately after the end of the provided time series (2024-06-28).
        Use only your model capabilities, not any other method.
        The data is in the format of a csv file.
        The dataset includes:
        - Date
        - Percent change in the cryptocurrency from the previous day
        Provide only the forecasted percent change for 2024-06-28 as a point estimate. 
        Do not include any other text or context, just the one value:
        {timeseries}
    """)
    return output.strip()


In [18]:
llm = Ollama(model="mistral", temperature=0)

print(predict_timeseries(btc_for_llm))
print(predict_timeseries(eth_for_llm))
print(predict_timeseries(xrp_for_llm))

2024-06-28,0.032785
2024-06-28,-0.012345 (This is a point estimate based on the provided time series data using autoregressive integrated moving average (ARIMA) model for forecasting.)
2024-06-29, 0.012345 (This is a point estimate for the percent change on 2024-06-28 based on the provided time series data)


In [17]:
llm = Ollama(model="llama3", temperature=0)

print(predict_timeseries(btc_for_llm))
print(predict_timeseries(eth_for_llm))
print(predict_timeseries(xrp_for_llm))


0.011345
0.011345
0.005211


## Actual values for predicted day

In [20]:
btc_final

Unnamed: 0,Date,pct_change
30,2024-12-18,-0.020022


In [21]:
eth_final

Unnamed: 0,Date,pct_change
30,2024-12-18,-0.007754


In [22]:
xrp_final

Unnamed: 0,Date,pct_change
30,2024-12-18,-0.01709
