In [1]:
import os
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from datetime import timedelta


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

crypto_symbols = ['ADA-USD', 'BTC-USD', 'BNB-USD', 'DOGE-USD','ETC-USD']  
start_date = '2020-01-01'
end_date = '2025-08-11'

folder_path = r'C:\CRYPTO\datareq'
os.makedirs(folder_path, exist_ok=True) 

def download_crypto_data():
    for crypto_symbol in crypto_symbols:
        print(f"Downloading data for {crypto_symbol}...")
        try:
            crypto_data = yf.download(crypto_symbol, start=start_date, end=end_date)
            crypto_data.reset_index(inplace=True)
            csv_file_path = os.path.join(folder_path, f'{crypto_symbol}.csv')
            crypto_data.to_csv(csv_file_path, index=False)
            print(f'Data for {crypto_symbol} saved to {csv_file_path}')
        except Exception as e:
            print(f"Error downloading {crypto_symbol}: {e}")

def process_and_train():
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            print(f"\nProcessing {file_name}...\n")
            try:
                ADA = pd.read_csv(file_path, encoding='utf-8')

                print(ADA.head())
                print(ADA.dtypes)

                if 'Date' in ADA.columns:
                    ADA['date'] = pd.to_datetime(ADA['Date'], errors='coerce')
                else:
                    raise ValueError(f"'Date' column not found in {file_name}")

                necessary_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
                for col in necessary_columns:
                    ADA[col] = pd.to_numeric(ADA[col], errors='coerce')# convert to numeric values

                ADA = ADA.dropna(subset=['Close', 'Open', 'High', 'Low', 'Volume']) #drop null values

                ADA['MA5'] = ADA['Close'].rolling(window=5).mean()
                ADA['MA10'] = ADA['Close'].rolling(window=10).mean()
                ADA['MA20'] = ADA['Close'].rolling(window=20).mean()

                ADA['Next_Close'] = ADA['Close'].shift(-1)
                ADA = ADA.dropna() 

                ADA_features = ADA[['Open', 'Low', 'High', 'Close', 'MA5', 'MA10', 'MA20']]
                ADA_target = ADA['Next_Close']

                ADA_scaler = MinMaxScaler()
                ADA_features_scaled = ADA_scaler.fit_transform(ADA_features)

                ADA_target_scaler = MinMaxScaler()
                ADA_target_scaled = ADA_target_scaler.fit_transform(ADA_target.values.reshape(-1, 1))

                def create_sequences(features, target, n_steps):
                    X, y = [], []
                    for i in range(len(features) - n_steps):
                        X.append(features[i:i + n_steps])
                        y.append(target[i + n_steps])
                    return np.array(X), np.array(y)

                n_steps = 30
                X_ADA, y_ADA = create_sequences(ADA_features_scaled, ADA_target_scaled, n_steps)
                X_train_ADA, X_test_ADA, y_train_ADA, y_test_ADA = train_test_split(X_ADA, y_ADA, test_size=0.2, random_state=42)

                ADA_LSTM_model = Sequential([
                    LSTM(units=128, return_sequences=True, input_shape=(X_train_ADA.shape[1], X_train_ADA.shape[2])),
                    Dropout(0.1),
                    LSTM(units=64),
                    Dropout(0.1),
                    Dense(units=32, activation='relu'),
                    Dense(units=1)
                ])

                optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
                ADA_LSTM_model.compile(optimizer=optimizer, loss='mean_squared_error')

                ADA_LSTM_history = ADA_LSTM_model.fit(X_train_ADA, y_train_ADA, epochs=50, batch_size=64, validation_split=0.2)

                y_test_pred_scaled = ADA_LSTM_model.predict(X_test_ADA)

                y_test_pred = ADA_target_scaler.inverse_transform(y_test_pred_scaled)
                y_test_actual = ADA_target_scaler.inverse_transform(y_test_ADA)

                r2 = r2_score(y_test_actual, y_test_pred)
                print(f"R² Score on test data for {file_name}: {r2:.4f}")

                end_index = len(ADA_features_scaled) - 1
                start_index = end_index - n_steps + 1
                sequence = ADA_features_scaled[start_index:end_index + 1]
                sequence = np.expand_dims(sequence, axis=0)

                ADA_predicted_scaled = ADA_LSTM_model.predict(sequence)
                ADA_predicted_scaled_reshaped = ADA_predicted_scaled.reshape(-1, 1)
                ADA_predicted = ADA_target_scaler.inverse_transform(ADA_predicted_scaled_reshaped)

                last_date = ADA['date'].max()
                next_date = last_date + timedelta(days=1)

                predicted_price = ADA_predicted[0][0]
                print(f'Predicted closing price for {next_date.strftime("%d-%b-%Y")} in {file_name}: {predicted_price:.2f}')

            except Exception as e:
                print(f"Error processing {file_name}: {e}")

download_crypto_data()  
process_and_train()

Downloading data for ADA-USD...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['ADA-USD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data for ADA-USD saved to C:\CRYPTO\datareq\ADA-USD.csv
Downloading data for BTC-USD...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BTC-USD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data for BTC-USD saved to C:\CRYPTO\datareq\BTC-USD.csv
Downloading data for BNB-USD...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BNB-USD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data for BNB-USD saved to C:\CRYPTO\datareq\BNB-USD.csv
Downloading data for DOGE-USD...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['DOGE-USD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data for DOGE-USD saved to C:\CRYPTO\datareq\DOGE-USD.csv
Downloading data for ETC-USD...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['ETC-USD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data for ETC-USD saved to C:\CRYPTO\datareq\ETC-USD.csv

Processing ADA-USD.csv...

   Date Adj Close    Close     High      Low     Open   Volume
0   NaN   ADA-USD  ADA-USD  ADA-USD  ADA-USD  ADA-USD  ADA-USD
Date         float64
Adj Close     object
Close         object
High          object
Low           object
Open          object
Volume        object
dtype: object
Error processing ADA-USD.csv: Found array with 0 sample(s) (shape=(0, 7)) while a minimum of 1 is required by MinMaxScaler.

Processing ADA-USD_predictions.csv...

                        Date  Predicted_Close  R2_Score
0  2024-11-21 00:00:00+00:00              0.8    0.9856
Date                object
Predicted_Close    float64
R2_Score           float64
dtype: object
Error processing ADA-USD_predictions.csv: 'Open'

Processing BNB-USD.csv...

   Date Adj Close    Close     High      Low     Open   Volume
0   NaN   BNB-USD  BNB-USD  BNB-USD  BNB-USD  BNB-USD  BNB-USD
Date         float64
Adj Close     object
Close        