In [1]:
import numpy as np
import pandas as pd
import requests
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta

In [9]:
def fetch_binance_data(symbol, interval, limit):
    url = "https://api.binance.com/api/v3/klines"
    params = {'symbol': symbol, 'interval': interval, 'limit': limit}
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data, columns=['open_time', 'open', 'high', 'low', 'close', 'volume',
                                     'close_time', 'quote_asset_volume', 'number_of_trades',
                                     'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    df['close'] = df['close'].astype(float)
    df['open'] = df['open'].astype(float)
    df['high'] = df['high'].astype(float)
    df['low'] = df['low'].astype(float)
    df['volume'] = df['volume'].astype(float)
    df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
    return df

In [11]:
def compute_rsi(series, period=14):
    delta = series.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def create_features(df):
    df['SMA_50'] = df['close'].rolling(window=50).mean()
    df['SMA_200'] = df['close'].rolling(window=200).mean()
    df['RSI'] = compute_rsi(df['close'], 14)
    df['EMA_12'] = df['close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['Bollinger_High'] = df['close'].rolling(window=20).mean() + 2 * df['close'].rolling(window=20).std()
    df['Bollinger_Low'] = df['close'].rolling(window=20).mean() - 2 * df['close'].rolling(window=20).std()
    df['volume_change'] = df['volume'].pct_change()
    
    # Lag features
    for i in range(1, 11):
        df[f'close_lag_{i}'] = df['close'].shift(i)
    
    df.dropna(inplace=True)  # Remove rows with NaN values

In [12]:
def create_feature_label_data(df):
    df['future_close'] = df['close'].shift(-7)  # Predict price 7 days ahead
    df.dropna(inplace=True)  # Remove rows with NaN values

    features = df[['SMA_50', 'SMA_200', 'RSI', 'EMA_12', 'EMA_26', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low', 'volume_change'] + [f'close_lag_{i}' for i in range(1, 11)]]
    labels = df['future_close']
    return features, labels

def build_and_train_model(features, labels):
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    X_train, X_test, y_train, y_test = train_test_split(scaled_features, labels, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model Mean Squared Error: {mse:.2f}")
    print(f"Model Accuracy (R² score): {r2:.2%}")
    
    return model, scaler


In [13]:
def predict_future_prices(model, scaler, df, conversion_rate, symbol, features):
    today = df['close_time'].max()
    next_hour_start = today + timedelta(hours=1)  # Start from the next hour

    future_dates = pd.date_range(start=next_hour_start, periods=168, freq='h')
    future_df = pd.DataFrame(index=future_dates)
    
    # Use the last available features for the prediction
    features_count = len(features.columns)
    
    last_features = df[['SMA_50', 'SMA_200', 'RSI', 'EMA_12', 'EMA_26', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low', 'volume_change'] + [f'close_lag_{i}' for i in range(1, 11)]].iloc[-1].values
    last_features = np.reshape(last_features, (1, features_count))  # Reshape for prediction
    
    future_predictions = []
    
    for _ in range(168):  # Predict for the next 168 hours
        # Predict the price for the next hour
        scaled_last_features = scaler.transform(pd.DataFrame(last_features, columns=features.columns))
        prediction = model.predict(scaled_last_features)
        future_predictions.append(prediction[0])
        
        # Update last_features for next prediction
        new_feature = np.zeros((1, features_count))
        last_features = np.roll(last_features, shift=-1, axis=1)  # Roll features left
        last_features[0, -1] = prediction[0]  # Update last feature with the new prediction

    future_df['predicted_close'] = future_predictions
    future_df['predicted_close_inr'] = future_df['predicted_close'] * conversion_rate

    # Find best times to buy and sell
    min_index = future_df['predicted_close'].idxmin()
    max_index = future_df['predicted_close'].idxmax()

    best_time_to_buy = future_df.loc[min_index].name
    best_time_to_sell = future_df.loc[max_index].name
    best_price_to_buy = future_df.loc[min_index, 'predicted_close']
    best_price_to_sell = future_df.loc[max_index, 'predicted_close']
    best_price_to_buy_inr = best_price_to_buy * conversion_rate
    best_price_to_sell_inr = best_price_to_sell * conversion_rate

    print(f"\nBest time to buy {symbol}: {best_time_to_buy} at price ${best_price_to_buy:.8f} ({best_price_to_buy_inr:.2f} INR)")
    print(f"Best time to sell {symbol}: {best_time_to_sell} at price ${best_price_to_sell:.8f} ({best_price_to_sell_inr:.2f} INR)")

    return future_df


In [14]:
def get_conversion_rate():
    url = "https://api.exchangerate-api.com/v4/latest/USD"
    response = requests.get(url)
    data = response.json()
    conversion_rate = data['rates'].get('INR', 1)  # Default to 1 if INR rate is not found
    return conversion_rate


In [15]:
def main():
    symbol = input("Enter the cryptocurrency symbol (e.g., BTCUSDT): ")
    df = fetch_binance_data(symbol, '1h', 1000)
    create_features(df)

    features, labels = create_feature_label_data(df)
    model, scaler = build_and_train_model(features, labels)

    conversion_rate = get_conversion_rate()
    future_df = predict_future_prices(model, scaler, df, conversion_rate, symbol, features)

    # Print the predicted prices in a tabular format
    print("\nPredicted Prices for {} in the next 168 hours:".format(symbol))

    pd.set_option('display.max_rows', None)
    pd.set_option('display.width', 1000)
    
    print(future_df[['predicted_close', 'predicted_close_inr']].to_string(index=True, header=True, float_format=lambda x: '{:.8f}'.format(x)))

    pd.reset_option('display.max_rows')
    pd.reset_option('display.width')


In [16]:
if __name__ == "__main__":
    main()

Model Mean Squared Error: 1283.30
Model Accuracy (R² score): 95.49%

Best time to buy ETHUSDT: 2024-07-30 11:59:59.999000 at price $3305.73600000 (276921.50 INR)
Best time to sell ETHUSDT: 2024-07-31 12:59:59.999000 at price $3434.40060000 (287699.74 INR)

Predicted Prices for ETHUSDT in the next 168 hours:
                         predicted_close  predicted_close_inr
2024-07-30 11:59:59.999    3305.73600000      276921.50472000
2024-07-30 12:59:59.999    3382.18100000      283325.30237000
2024-07-30 13:59:59.999    3392.73900000      284209.74603000
2024-07-30 14:59:59.999    3336.36980000      279487.69814600
2024-07-30 15:59:59.999    3382.56110000      283357.14334700
2024-07-30 16:59:59.999    3397.53240000      284611.28914800
2024-07-30 17:59:59.999    3390.31820000      284006.95561400
2024-07-30 18:59:59.999    3366.02160000      281971.62943200
2024-07-30 19:59:59.999    3364.11380000      281811.81302600
2024-07-30 20:59:59.999    3395.21210000      284416.91761700
2024-07-3