In [None]:
!pip install ccxt pandas numpy scikit-learn tensorflow pycoingecko


In [None]:
import ccxt
import pandas as pd
import os
from datetime import datetime

# Initialize the exchange (Kraken or Binance US)
exchange = ccxt.kraken()  # Use 'binanceus' if needed

# Create folder to store datasets
data_folder = 'crypto_datasets'
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

# Define a function to fetch and save historical data for a symbol
def fetch_and_save_data(symbol, timeframe='1d', since=None, limit=500):
    try:
        print(f"Fetching data for {symbol}...")
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit)
        if ohlcv:
            df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
            csv_file = os.path.join(data_folder, f"{symbol.replace('/', '_')}.csv")
            df.to_csv(csv_file, index=False)
            print(f"Data saved for {symbol} in {csv_file}")
        else:
            print(f"No data found for {symbol}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {str(e)}")

# Fetch all available markets (symbols) from the exchange
markets = exchange.load_markets()
symbols = [market for market in markets if markets[market]['quote'] == 'USD']  # Filter for USD quote markets

# Fetch historical data for each symbol and save it to CSV
since = exchange.parse8601('2023-01-01T00:00:00Z')  # Fetch data since 2023
for symbol in symbols:
    fetch_and_save_data(symbol, since=since)

print("Data fetching complete.")


In [None]:
import ccxt
import pandas as pd
import os
from datetime import datetime

# Create a folder for storing the datasets
data_folder = 'crypto_data_all'
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

# Function to fetch and save data for all symbols of an exchange
def fetch_and_save_data(exchange):
    markets = exchange.load_markets()  # Load all available markets
    for symbol in markets:
        # Only fetch data for cryptocurrencies (filter out non-crypto markets)
        if '/' not in symbol:
            continue  # Skip non-crypto symbols

        try:
            # Fetch OHLCV data for the last 1000 candles (adjust timeframe if needed)
            ohlcv = exchange.fetch_ohlcv(symbol, timeframe='1d', limit=1000)

            # Convert to DataFrame
            df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

            # Convert timestamp to readable datetime
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

            # Save to CSV
            filename = f'{data_folder}/{exchange.id}_{symbol.replace("/", "_")}.csv'
            df.to_csv(filename, index=False)
            print(f"Saved {symbol} data to {filename}")

        except Exception as e:
            print(f"Error fetching data for {symbol}: {str(e)}")

# Initialize the exchanges
exchanges = {
    'kraken': ccxt.kraken(),
    'binanceus': ccxt.binanceus()
}

# Fetch and save data for each exchange
for exchange_name, exchange in exchanges.items():
    print(f"Fetching data from {exchange_name}...")
    fetch_and_save_data(exchange)

print("Data fetching complete!")


In [None]:
import ccxt
import pandas as pd
import os
import time
import logging
from datetime import datetime

# ===========================
# Configuration Parameters
# ===========================

# List of cryptocurrency symbols
symbols = [
    "1INCH/EUR", "1INCH/USD", "AAVE/ETH", "AAVE/EUR", "AAVE/GBP", "AAVE/USD",
    "AAVE/BTC", "ACA/EUR", "ACA/USD", "ACH/EUR", "ACH/USD", "ADA/AUD",
    "ADA/ETH", "ADA/EUR", "ADA/GBP", "ADA/USD", "ADA/USDT", "ADA/BTC",
    # ... (Include all other symbols here)
    "USD/CAD", "USD/JPY"
]

# Exchanges to use in priority order
exchange_priority = ['binanceus', 'kraken']

# Timeframe for OHLCV data
timeframe = '1d'  # Daily data; you can change to '1h', '1m', etc.

# Number of data points to fetch
limit = 1000  # Maximum allowed by the exchange

# Folder to save CSV files
data_folder = 'crypto_data'

# Logging configuration
log_file = 'data_fetching.log'
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# ===========================
# Helper Functions
# ===========================

def initialize_exchanges():
    """
    Initialize the exchanges and load their markets.
    """
    exchanges = {}
    for ex in exchange_priority:
        try:
            exchange = getattr(ccxt, ex)()
            exchange.load_markets()
            exchanges[ex] = exchange
            logging.info(f"Initialized exchange: {ex}")
        except Exception as e:
            logging.error(f"Error initializing exchange {ex}: {e}")
    return exchanges

def create_data_folder(folder):
    """
    Create the data folder if it doesn't exist.
    """
    if not os.path.exists(folder):
        os.makedirs(folder)
        logging.info(f"Created data folder: {folder}")

def fetch_ohlcv_data(exchange, symbol, timeframe, limit):
    """
    Fetch OHLCV data for a given symbol from a specific exchange.
    """
    try:
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)
        logging.info(f"Fetched data for {symbol} from {exchange.id}")
        return ohlcv
    except Exception as e:
        logging.warning(f"Failed to fetch {symbol} from {exchange.id}: {e}")
        return None

def save_to_csv(data, symbol, folder):
    """
    Save the OHLCV data to a CSV file.
    """
    df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    # Replace '/' with '_' for filename
    filename = symbol.replace('/', '_') + '.csv'
    filepath = os.path.join(folder, filename)
    df.to_csv(filepath, index=False)
    logging.info(f"Saved data to {filepath}")

def main():
    # Initialize exchanges
    exchanges = initialize_exchanges()

    # Create data folder
    create_data_folder(data_folder)

    # Iterate over each symbol
    for symbol in symbols:
        logging.info(f"Processing symbol: {symbol}")
        data_fetched = False

        for ex_id in exchange_priority:
            exchange = exchanges.get(ex_id)
            if exchange is None:
                continue

            # Check if the symbol exists on the exchange
            if symbol in exchange.symbols:
                logging.info(f"{symbol} found on {ex_id}")
                ohlcv = fetch_ohlcv_data(exchange, symbol, timeframe, limit)
                if ohlcv:
                    save_to_csv(ohlcv, symbol, data_folder)
                    data_fetched = True
                    # Respect rate limits
                    time.sleep(exchange.rateLimit / 1000)
                    break
            else:
                logging.info(f"{symbol} not available on {ex_id}")

        if not data_fetched:
            logging.error(f"{symbol} not found on any supported exchanges.")

    logging.info("Data fetching process completed.")

if __name__ == "__main__":
    start_time = datetime.now()
    logging.info("Data fetching script started.")
    main()
    end_time = datetime.now()
    duration = end_time - start_time
    logging.info(f"Script finished. Duration: {duration}")


In [None]:
import ccxt
import os
import pandas as pd
from datetime import datetime

# Initialize the exchange (Kraken or Binance US)
kraken = ccxt.kraken()
binance_us = ccxt.binanceus()

# Create a folder to store the data
data_folder = 'crypto_data_Current data'
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

# Function to save OHLCV data to CSV
def save_data_to_csv(exchange, symbol, since, timeframe='1d'):
    try:
        # Fetch historical data (OHLCV: Open, High, Low, Close, Volume)
        ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe, since=since)
        if ohlcv_data:
            # Convert to DataFrame
            df = pd.DataFrame(ohlcv_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
            # Convert timestamp to readable date
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

            # Generate file name for CSV
            file_name = f"{symbol.replace('/', '-')}.csv"
            file_path = os.path.join(data_folder, file_name)

            # Save DataFrame to CSV
            df.to_csv(file_path, index=False)
            print(f"Saved data for {symbol} to {file_path}")
        else:
            print(f"No data available for {symbol}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Function to get crypto symbols (excluding fiat pairs)
def get_crypto_symbols(exchange):
    symbols = []
    markets = exchange.load_markets()
    for market in markets:
        if '/' in market:
            base, quote = market.split('/')
            # Filter out fiat currencies like USD, EUR, etc.
            if quote not in ['USD', 'EUR', 'JPY', 'GBP', 'AUD', 'CAD', 'CHF']:
                symbols.append(market)
    return symbols

# Set the timeframe and 'since' date for historical data (fetch past 1 year of data)
timeframe = '1d'  # Daily OHLCV data
since = exchange.parse8601('2023-01-01T00:00:00Z')  # You can modify this to get more historical data

# Fetch and save data for Kraken
print("Fetching data from Kraken...")
kraken_symbols = get_crypto_symbols(kraken)
for symbol in kraken_symbols:
    save_data_to_csv(kraken, symbol, since, timeframe)

# Fetch and save data for Binance US
print("Fetching data from Binance US...")
binance_us_symbols = get_crypto_symbols(binance_us)
for symbol in binance_us_symbols:
    save_data_to_csv(binance_us, symbol, since, timeframe)

print("Data extraction complete.")


To generate and extract cryptocurrency market data for all available symbols from Kraken and Binance US using the `ccxt` library and save them into CSV files, we will follow the steps outlined below:

### Process Outline:
1. **Initialize the exchange**: Connect to Kraken or Binance US using the `ccxt` library.
2. **Retrieve available symbols**: Fetch all available trading symbols for cryptocurrency pairs (we will skip fiat pairs like EUR/USD).
3. **Fetch historical data**: Extract OHLCV (Open, High, Low, Close, Volume) data for each symbol, capturing as much data as the exchange allows.
4. **Save data to CSV**: For each symbol, save the historical data to a CSV file.
5. **Organize datasets**: Store the data in a structured folder for further model building.

### Full Code Implementation:

```python
import ccxt
import os
import pandas as pd
from datetime import datetime

# Initialize the exchange (Kraken or Binance US)
kraken = ccxt.kraken()
binance_us = ccxt.binanceus()

# Create a folder to store the data
data_folder = 'crypto_data'
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

# Function to save OHLCV data to CSV
def save_data_to_csv(exchange, symbol, since, timeframe='1d'):
    try:
        # Fetch historical data (OHLCV: Open, High, Low, Close, Volume)
        ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe, since=since)
        if ohlcv_data:
            # Convert to DataFrame
            df = pd.DataFrame(ohlcv_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
            # Convert timestamp to readable date
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
            
            # Generate file name for CSV
            file_name = f"{symbol.replace('/', '-')}.csv"
            file_path = os.path.join(data_folder, file_name)
            
            # Save DataFrame to CSV
            df.to_csv(file_path, index=False)
            print(f"Saved data for {symbol} to {file_path}")
        else:
            print(f"No data available for {symbol}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Function to get crypto symbols (excluding fiat pairs)
def get_crypto_symbols(exchange):
    symbols = []
    markets = exchange.load_markets()
    for market in markets:
        if '/' in market:
            base, quote = market.split('/')
            # Filter out fiat currencies like USD, EUR, etc.
            if quote not in ['USD', 'EUR', 'JPY', 'GBP', 'AUD', 'CAD', 'CHF']:
                symbols.append(market)
    return symbols

# Set the timeframe and 'since' date for historical data (fetch past 1 year of data)
timeframe = '1d'  # Daily OHLCV data
since = exchange.parse8601('2023-01-01T00:00:00Z')  # You can modify this to get more historical data

# Fetch and save data for Kraken
print("Fetching data from Kraken...")
kraken_symbols = get_crypto_symbols(kraken)
for symbol in kraken_symbols:
    save_data_to_csv(kraken, symbol, since, timeframe)

# Fetch and save data for Binance US
print("Fetching data from Binance US...")
binance_us_symbols = get_crypto_symbols(binance_us)
for symbol in binance_us_symbols:
    save_data_to_csv(binance_us, symbol, since, timeframe)

print("Data extraction complete.")
```

### Detailed Explanation:
1. **Exchange Initialization**:
   - We initialize two exchanges: Kraken (`ccxt.kraken()`) and Binance US (`ccxt.binanceus()`).
  
2. **Folder Creation**:
   - A folder named `crypto_data` is created to store the CSV files for each cryptocurrency symbol.

3. **Fetching Symbols**:
   - The `get_crypto_symbols()` function filters out non-cryptocurrency symbols by excluding pairs with fiat currencies (USD, EUR, etc.).

4. **Data Fetching**:
   - The `fetch_ohlcv()` function is used to retrieve historical OHLCV data (Open, High, Low, Close, Volume) for each symbol.
   - Data is fetched for the daily (`1d`) timeframe since January 1, 2023, but you can modify this range to get more data as needed.

5. **Data Saving**:
   - The retrieved data is stored in a CSV file with the symbol name as the file name (e.g., `BTC-USDT.csv`).
   - Each file is saved in the `crypto_data` folder.

6. **Error Handling**:
   - The code includes error handling to catch any issues with data fetching or symbol errors.

### How It Works:
1. The script connects to both Kraken and Binance US exchanges and loads all available markets (symbols).
2. It then filters the symbols to only keep cryptocurrency pairs, excluding fiat currency pairs.
3. For each symbol, it fetches OHLCV data (historical price and volume) and saves it in a CSV file.
4. The data for each cryptocurrency symbol is saved in a structured folder, which can later be used for model training.

This script ensures you extract as much historical data as possible for each cryptocurrency supported by the exchange, and you can later use this data for price prediction models.

In [None]:
!pip install ccxt pandas numpy matplotlib seaborn scikit-learn tensorflow


In [None]:
# Install required libraries
# !pip install ccxt pandas numpy matplotlib seaborn scikit-learn tensorflow

import ccxt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Step 1: Data Gathering from Kraken (using BTC/USD pair)
def fetch_ohlcv_data(symbol, timeframe='1d', since=None):
    exchange = ccxt.kraken()  # or ccxt.binanceus()
    ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since)
    # Convert to DataFrame
    df = pd.DataFrame(ohlcv_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

# Fetch historical data for BTC/USD
data = fetch_ohlcv_data('BTC/USD')

# Step 2: Data Preprocessing
# Sort by timestamp
data = data.sort_values('timestamp')

# Normalize the 'close' price
scaler = MinMaxScaler(feature_range=(0, 1))
data['scaled_close'] = scaler.fit_transform(data['close'].values.reshape(-1,1))

# Prepare the training data (last 60 days to predict the next day)
def create_dataset(data, time_step=60):
    X, y = [], []
    for i in range(time_step, len(data)):
        X.append(data[i-time_step:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Prepare the dataset
scaled_data = data['scaled_close'].values.reshape(-1, 1)
X, y = create_dataset(scaled_data)

# Reshape for LSTM input (samples, timesteps, features)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

# Split into train and test sets
split_ratio = 0.8
split = int(len(X) * split_ratio)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Step 3: Building the LSTM Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))  # Predict the 'close' price

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Step 4: Training the Model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Step 5: Testing the Model and Visualizing the Results
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)

# Unscale the real test values
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

# Visualize the results
plt.figure(figsize=(14, 5))
plt.plot(real_prices, color='blue', label='Actual BTC/USD Price')
plt.plot(predicted_prices, color='red', label='Predicted BTC/USD Price')
plt.title('BTC/USD Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# Step 6: Model Accuracy (MSE, RMSE)
mse = mean_squared_error(real_prices, predicted_prices)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')



In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Step 1: Make Predictions
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)  # Unscale predicted prices
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))  # Unscale actual prices

# Step 2: Calculate the Accuracy Metrics
# Mean Squared Error (MSE)
mse = mean_squared_error(real_prices, predicted_prices)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(real_prices, predicted_prices)

# R-squared (R²) Score
r2 = r2_score(real_prices, predicted_prices)

# Step 3: Print the Accuracy Metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R²) Score: {r2}")


In [None]:
# Import necessary libraries
import ccxt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Fetch OHLCV Data from Kraken or Binance US
def fetch_ohlcv_data(symbol, timeframe='1d', since=None):
    exchange = ccxt.kraken()  # Change to binanceus() if needed
    ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since)
    df = pd.DataFrame(ohlcv_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

# Fetch historical data for BTC/USD
data = fetch_ohlcv_data('BTC/USD')

# Data Preprocessing
data = data.sort_values('timestamp')
scaler = MinMaxScaler(feature_range=(0, 1))
data['scaled_close'] = scaler.fit_transform(data['close'].values.reshape(-1,1))

# Prepare the data
def create_dataset(data, time_step=60):
    X, y = [], []
    for i in range(time_step, len(data)):
        X.append(data[i-time_step:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

scaled_data = data['scaled_close'].values.reshape(-1, 1)
X, y = create_dataset(scaled_data)

X = np.reshape(X, (X.shape[0], X.shape[1], 1))

split_ratio = 0.8
split = int(len(X) * split_ratio)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Building the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)

real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

# Visualization of predicted vs actual prices
plt.figure(figsize=(14, 5))
plt.plot(real_prices, color='blue', label='Actual BTC/USD Price')
plt.plot(predicted_prices, color='red', label='Predicted BTC/USD Price')
plt.title('BTC/USD Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# Accuracy Metrics
mse = mean_squared_error(real_prices, predicted_prices)
rmse = np.sqrt(mse)
mae = mean_absolute_error(real_prices, predicted_prices)
explained_var = explained_variance_score(real_prices, predicted_prices)

# Printing accuracy metrics
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Explained Variance Score: {explained_var * 100}%')

# Let's assume the "accuracy" as the complement of the normalized RMSE
accuracy = 100 - (rmse / np.mean(real_prices)) * 100
print(f'Model Accuracy: {accuracy:.2f}%')

# Heatmap of error distribution
errors = real_prices - predicted_prices
plt.figure(figsize=(10,6))
sns.heatmap(errors, annot=False, cmap="coolwarm")
plt.title('Error Distribution Heatmap')
plt.show()

# Confusion Matrix-like visualization for regression (only for reference, not typical in regression)
plt.figure(figsize=(6,6))
plt.scatter(real_prices, predicted_prices, c='blue')
plt.plot(real_prices, real_prices, color='red', linewidth=2)
plt.title('Actual vs Predicted Prices')
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.show()


In [None]:
# prompt: Generate the code to display the accuracy of this model like print the accuracy of this model

# Calculate the Accuracy Metrics
# Mean Squared Error (MSE)
mse = mean_squared_error(real_prices, predicted_prices)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(real_prices, predicted_prices)

# R-squared (R²) Score
r2 = r2_score(real_prices, predicted_prices)

# Print the Accuracy Metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R²) Score: {r2}")


In [None]:
# prompt: Only I need accuracy in percentage how much it is predicted correctly

# Calculate the Accuracy Metrics
# Mean Squared Error (MSE)
mse = mean_squared_error(real_prices, predicted_prices)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Let's assume the "accuracy" as the complement of the normalized RMSE
accuracy = 100 - (rmse / np.mean(real_prices)) * 100
print(f'Model Accuracy: {accuracy:.2f}%')


In [None]:
# Import necessary libraries
import ccxt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Fetch OHLCV Data from Kraken or Binance US
def fetch_ohlcv_data(symbol, timeframe='1d', since=None):
    exchange = ccxt.kraken()  # Change to binanceus() if needed
    ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since)
    df = pd.DataFrame(ohlcv_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

# Fetch historical data for BTC/USD
data = fetch_ohlcv_data('XMR/USDT')

# Data Preprocessing
data = data.sort_values('timestamp')
scaler = MinMaxScaler(feature_range=(0, 1))
data['scaled_close'] = scaler.fit_transform(data['close'].values.reshape(-1,1))

# Prepare the data
def create_dataset(data, time_step=60):
    X, y = [], []
    for i in range(time_step, len(data)):
        X.append(data[i-time_step:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

scaled_data = data['scaled_close'].values.reshape(-1, 1)
X, y = create_dataset(scaled_data)

X = np.reshape(X, (X.shape[0], X.shape[1], 1))

split_ratio = 0.8
split = int(len(X) * split_ratio)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Building the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)

real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

# Visualization of predicted vs actual prices
plt.figure(figsize=(14, 5))
plt.plot(real_prices, color='blue', label='Actual BTC/USD Price')
plt.plot(predicted_prices, color='red', label='Predicted BTC/USD Price')
plt.title('XMR/USDT Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# Accuracy Metrics
mse = mean_squared_error(real_prices, predicted_prices)
rmse = np.sqrt(mse)
mae = mean_absolute_error(real_prices, predicted_prices)
explained_var = explained_variance_score(real_prices, predicted_prices)

# Printing accuracy metrics
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Explained Variance Score: {explained_var * 100}%')

# Let's assume the "accuracy" as the complement of the normalized RMSE
accuracy = 100 - (rmse / np.mean(real_prices)) * 100
print(f'Model Accuracy: {accuracy:.2f}%')

# Heatmap of error distribution
errors = real_prices - predicted_prices
plt.figure(figsize=(10,6))
sns.heatmap(errors, annot=False, cmap="coolwarm")
plt.title('Error Distribution Heatmap')
plt.show()

# Confusion Matrix-like visualization for regression (only for reference, not typical in regression)
plt.figure(figsize=(6,6))
plt.scatter(real_prices, predicted_prices, c='blue')
plt.plot(real_prices, real_prices, color='red', linewidth=2)
plt.title('Actual vs Predicted Prices')
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.show()


In [None]:

# Calculate the Accuracy Metrics
# Mean Squared Error (MSE)
mse = mean_squared_error(real_prices, predicted_prices)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(real_prices, predicted_prices)

# R-squared (R²) Score
r2 = r2_score(real_prices, predicted_prices)

# Print the Accuracy Metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R²) Score: {r2}")


In [None]:
# prompt: Only I need accuracy in percentage how much it is predicted correctly

# Calculate the Accuracy Metrics
# Mean Squared Error (MSE)
mse = mean_squared_error(real_prices, predicted_prices)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Let's assume the "accuracy" as the complement of the normalized RMSE
accuracy = 100 - (rmse / np.mean(real_prices)) * 100
print(f'Model Accuracy: {accuracy:.2f}%')
