<a href="https://colab.research.google.com/github/Sidhtang/AI-planet-assignment/blob/main/crypto_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance



In [4]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=46ae143a1fbd82c8e8e63d797c7c43b97cc63b1a09d11f2fd441e2de466c9475
  Stored in directory: /root/.cache/pip/wheels/5f/67/4f/8a9f252836e053e532c6587a3230bc72a4deb16b03a829610b
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [5]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from ta.trend import SMAIndicator, EMAIndicator
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands

# 1. Enhanced Data Collection
def get_crypto_data(symbol, start_date, end_date):
    data = yf.download(symbol, start=start_date, end=end_date)
    return data

# 2. Improved Feature Engineering
def engineer_features(data, sequence_length):
    df = data.copy()

    # Add technical indicators
    df['SMA'] = SMAIndicator(close=df['Close'], window=14).sma_indicator()
    df['EMA'] = EMAIndicator(close=df['Close'], window=14).ema_indicator()
    df['RSI'] = RSIIndicator(close=df['Close'], window=14).rsi()
    bb = BollingerBands(close=df['Close'], window=20, window_dev=2)
    df['BB_upper'] = bb.bollinger_hband()
    df['BB_lower'] = bb.bollinger_lband()

    # Calculate returns
    df['Returns'] = df['Close'].pct_change()

    # Use all features
    features = ['Close', 'Volume', 'SMA', 'EMA', 'RSI', 'BB_upper', 'BB_lower', 'Returns']
    df = df[features].dropna()

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df)

    # Create sequences
    X, y = [], []
    for i in range(len(scaled_data) - sequence_length):
        X.append(scaled_data[i:(i + sequence_length), :])
        y.append(1 if scaled_data[i + sequence_length, 0] > scaled_data[i + sequence_length - 1, 0] else 0)

    return np.array(X), np.array(y), scaler

# 3. Improved Model Creation
def create_model(input_shape):
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(100, return_sequences=True),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(100, return_sequences=False),
        BatchNormalization(),
        Dropout(0.3),
        Dense(50, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# 4. Improved Model Training
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = create_model((X.shape[1], X.shape[2]))

    # Callbacks for better training
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

    history = model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.1,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    return model, X_test, y_test, history

# 5. Model Evaluation (unchanged)
def evaluate_model(model, X_test, y_test):
    y_pred = (model.predict(X_test) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    return accuracy, report

# Main execution
if __name__ == "__main__":
    # Example usage
    symbol = "BTC-USD"
    start_date = "2018-01-01"  # Extended date range for more data
    end_date = "2023-12-31"
    sequence_length = 60  # Number of previous days to use for prediction

    # Get data
    data = get_crypto_data(symbol, start_date, end_date)

    # Engineer features
    X, y, scaler = engineer_features(data, sequence_length)

    # Train model
    model, X_test, y_test, history = train_model(X, y)

    # Evaluate model
    accuracy, report = evaluate_model(model, X_test, y_test)

    print(f"Model Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)

    # Make a prediction for the next day
    last_sequence = X[-1:]
    prediction = model.predict(last_sequence)

    print(f"Prediction for next day: {'Up' if prediction[0,0] > 0.5 else 'Down'}")

[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


Epoch 1/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 27ms/step - accuracy: 0.5360 - loss: 0.8413 - val_accuracy: 0.4024 - val_loss: 0.6997 - learning_rate: 0.0010
Epoch 2/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.4863 - loss: 0.8255 - val_accuracy: 0.4556 - val_loss: 0.7001 - learning_rate: 0.0010
Epoch 3/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.4850 - loss: 0.7956 - val_accuracy: 0.4379 - val_loss: 0.6941 - learning_rate: 0.0010
Epoch 4/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.5132 - loss: 0.7677 - val_accuracy: 0.5030 - val_loss: 0.6989 - learning_rate: 0.0010
Epoch 5/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.4952 - loss: 0.7539 - val_accuracy: 0.5266 - val_loss: 0.7054 - learning_rate: 0.0010
Epoch 6/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m