# TrendVisor

The aim of this project is to fine-tune an image2text model using a dataset of price charts such that it predicts the next day trend.

Step 1: fetch historical prices 1m price charts for 24h and classify each image as:
- bullish: if the next day candle is green
- bearish: if the next day canbdle is bearish
- bullish_plus : if the newt day price increase is higher than 5%
- bearish_plus: if the next day price decrease is higher than 5%

Step 2: convert these data into image, normalize using min and max value, include normalized volume

Step 3: train the model

Step 4: use the fine-tuned model to predict the next day trend

In [3]:
import requests
import pandas as pd 

# step 1: fetch the data, generate an image and classify it
def fetch_market_data(symbol, interval, limit):
    """
    Retrieve cryptocurrency price data from the Binance API and convert it into a Pandas DataFrame.

    Parameters:
    - symbol (str): The trading symbol of the cryptocurrency you want to fetch data for, e.g., 'BTC' for Bitcoin.
    - interval (str): The time interval for the candlestick data, e.g., '1h' for 1-hour intervals.
    - limit (int): The maximum number of data points to retrieve.

    Returns:
    - df (pd.DataFrame): A Pandas DataFrame containing cryptocurrency price data with columns:
        - timestamp (datetime64): Timestamp of the data point.
        - open (float64): Opening price of the candlestick.
        - close (float64): Closing price of the candlestick.
        - high (float64): Highest price during the candlestick interval.
        - low (float64): Lowest price during the candlestick interval.
        - volume (float64): Trading volume during the candlestick interval.

    Example:
    ```
    import pandas as pd
    import requests

    symbol = 'BTC'  # Example: Bitcoin
    interval = '1h'  # Example: 1-hour intervals
    limit = 1000  # Example: Retrieve 1000 data points

    df = scrap(symbol, interval, limit)
    ```

    Note:
    - This function uses the Binance API to fetch historical cryptocurrency price data in candlestick format.
    - The 'symbol' parameter should be provided without the 'USDT' suffix (e.g., 'BTC' instead of 'BTCUSDT').
    - The 'interval' parameter specifies the time interval for each data point ('1h' for 1-hour intervals).
    - The 'limit' parameter limits the number of data points retrieved.
    - The returned DataFrame contains timestamped price data in a structured format suitable for analysis and visualization.

    Dependencies:
    - pandas
    - requests

    API Reference:
    - Binance API documentation: https://binance-docs.github.io/apidocs/spot/en/#kline-candlestick-data
    """
    # Define the endpoint and parameters
    endpoint = 'https://api.binance.com/api/v1/klines'
    symbol = symbol + 'USDT'

    params = {
        'symbol': symbol,
        'interval': interval,
        'limit': limit,
    }

    # Make the API request
    response = requests.get(endpoint, params=params)
    data = response.json()

    # Convert the data to a Pandas DataFrame
    df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

    # Select and convert columns to floats
    df = df[['timestamp', 'open', 'close', 'high', 'low', 'volume']].astype({'open': float, 'close': float, 'high': float, 'low': float, 'volume': float})

    return df

In [11]:
import requests
import random
from datetime import datetime, timedelta

def fetch_random_ohlcv_data_v1(symbols, start_date, end_date, n):
    base_url = 'https://api.binance.com/api/v1/klines'

    # Initialize a list to store the fetched data
    ohlcv_data = []

    # Convert start_date and end_date to datetime objects
    start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
    end_datetime = datetime.strptime(end_date, '%Y-%m-%d')

    # Calculate the maximum time difference (in days) between start and end dates
    max_timedelta = end_datetime - start_datetime

    for _ in range(n):
        # Choose a random symbol from the list
        symbol = random.choice(symbols)

        # Choose a random date within the specified range
        random_timedelta = timedelta(days=random.randint(0, max_timedelta.days))
        random_date = start_datetime + random_timedelta

        # Calculate the end date for the 48-hour period
        end_of_period = random_date + timedelta(minutes=1000)

        # Convert dates to milliseconds
        start_timestamp = int(random_date.timestamp() * 1000)
        end_timestamp = int(end_of_period.timestamp() * 1000)

        # Prepare query parameters
        params = {
            'symbol': symbol,
            'interval': '1m',
            'startTime': start_timestamp,
            'endTime': end_timestamp,
            'limit': 1000  
        }

        # Send a GET request to Binance API v1
        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            ohlcv = response.json()
            ohlcv_data.append({'symbol': symbol, 'date': random_date, 'ohlcv': ohlcv})
        else:
            print(f"Failed to fetch data for symbol {symbol} on date {random_date}")

    return ohlcv_data

# Example usage:
symbols = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT']  # Note: Symbols without slashes
start_date = '2019-01-01'
end_date = '2023-09-01'
n = 10  # Number of times to fetch data

result = fetch_random_ohlcv_data_v1(symbols, start_date, end_date, n)
for entry in result:
    print(f"Symbol: {entry['symbol']}, Date: {entry['date']}, length : {len(entry['ohlcv'])}, OHLCV Data: {entry['ohlcv']}")


Symbol: XRPUSDT, Date: 2020-06-29 00:00:00, length : 1000, OHLCV Data: [[1593381600000, '0.17746000', '0.17761000', '0.17724000', '0.17730000', '59537.20000000', 1593381659999, '10562.19352100', 38, '38287.90000000', '6791.00587800', '0'], [1593381660000, '0.17730000', '0.17730000', '0.17707000', '0.17707000', '43327.20000000', 1593381719999, '7677.08821200', 21, '0.00000000', '0.00000000', '0'], [1593381720000, '0.17706000', '0.17710000', '0.17704000', '0.17705000', '21906.50000000', 1593381779999, '3879.18849100', 14, '11505.70000000', '2037.65947000', '0'], [1593381780000, '0.17706000', '0.17714000', '0.17695000', '0.17695000', '61777.50000000', 1593381839999, '10939.15937100', 33, '40240.80000000', '7126.26476500', '0'], [1593381840000, '0.17696000', '0.17705000', '0.17695000', '0.17700000', '4283.10000000', 1593381899999, '758.21376400', 8, '1129.60000000', '199.99322200', '0'], [1593381900000, '0.17699000', '0.17704000', '0.17679000', '0.17681000', '24237.60000000', 1593381959999

In [12]:
import os
import matplotlib.pyplot as plt
import numpy as np

def plot_candlestick_charts(result, threshold):
    # Create the "train" folder if it doesn't exist
    if not os.path.exists("train"):
        os.makedirs("train")

    for idx, entry in enumerate(result):
        symbol = entry['symbol']
        ohlcv = entry['ohlcv'][:500]  # Use only the first 500 points

        last_close=float(entry['ohlcv'][-1][4])
        point_500_close=float(entry['ohlcv'][499][4])

        # Extract OHLCV data
        #timestamps = [entry[0] for entry in ohlcv]
        time_steps = range(1,501)
        open_prices = np.array([float(entry[1]) for entry in ohlcv])
        high_prices = np.array([float(entry[2]) for entry in ohlcv])
        low_prices = np.array([float(entry[3]) for entry in ohlcv])
        close_prices = np.array([float(entry[4]) for entry in ohlcv])
        volumes = np.array([float(entry[5]) for entry in ohlcv])

        # Normalize prices and volumes
        max_price = np.max(high_prices)
        min_price = np.min(low_prices)
        max_volume = np.max(volumes)
        min_volume = np.min(volumes)

        normalized_open = (open_prices - min_price) / (max_price - min_price)
        normalized_high = (high_prices - min_price) / (max_price - min_price)
        normalized_low = (low_prices - min_price) / (max_price - min_price)
        normalized_close = (close_prices - min_price) / (max_price - min_price)
        normalized_volume = (volumes - min_volume) / (max_volume - min_volume)

        # Calculate label (bullish, bearish, very bullish, or very bearish)
        price_change_percentage = ((point_500_close - last_close) / last_close) * 100

        if price_change_percentage > threshold:
            label = "Very Bullish"
        elif price_change_percentage < -threshold:
            label = "Very Bearish"
        elif price_change_percentage > 0:
            label = "Bullish"
        else:
            label = "Bearish"
        print(f"{symbol} - {point_500_close} - {last_close} - {label}")
        # Create a candlestick chart with volume
        fig, ax1 = plt.subplots(figsize=(10, 6))

        # Candlestick plot
        ax1.plot(time_steps, normalized_low, color='black', linewidth=1)
        ax1.vlines(time_steps, normalized_low, normalized_high, color='black', linewidth=1)
        ax1.fill_between(time_steps, normalized_open, normalized_close, where=normalized_open > normalized_close, facecolor='white', edgecolor='black', linewidth=1)
        ax1.fill_between(time_steps, normalized_open, normalized_close, where=normalized_open <= normalized_close, facecolor='black', edgecolor='black', linewidth=1)

        # Volume plot
        ax2 = ax1.twinx()
        ax2.fill_between(time_steps, 0, normalized_volume, color='gray', alpha=0.5)

        #plt.xlabel('Time Steps')
        #plt.ylabel('Price')
        plt.grid(True)

        #plt.title(f'{symbol} Candlestick Chart ({label})')

        # Save the chart with label and ID in the "train" folder
        filename = os.path.join("train", f'{label}-{idx}.png')
        plt.savefig(filename)
        plt.close()

# Example usage:
threshold = 5.0  # Adjust the threshold as needed
plot_candlestick_charts(result, threshold)



TypeError: unsupported operand type(s) for -: 'str' and 'str'