In [None]:
# Importing required Libraries

import numpy as np
import pandas as pd
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# For PLotting we will use these library

import matplotlib.pyplot as plt
from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# For model building we used these library

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import LSTM
# We will see the correlation between each feature
import seaborn as sns

In [None]:
# Reading the dataset

path = "./BTC-USD.csv"
df = pd.read_csv(path)

# Understanding Data

In [None]:
print('Dataset Shape:', df.shape)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
print('Data Info:', df.info())

In [None]:
df.describe()

# Exploratory Data Analysis

In [None]:
# We adjust the Date Format to be all the same Year-Month-Day
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

# We set the Date to be the Index
df.set_index('Date', inplace=True)

In [None]:
df

In [None]:
# here we just resample the dataset based on daily monthly yearly frequency

# Resampling to Daily frequency
df_day = df.resample('D').mean()

# Resampling to monthly frequency
df_month = df.resample('M').mean()

# Resampling to annual frequency
df_year = df.resample('A-DEC').mean()

In [None]:
# Plot 1: Line chart of Bitcoin closing prices over time
plt.figure(figsize=(10, 5))
plt.plot(df.index, df['Close'])
plt.title('Bitcoin Closing Prices over Time')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Plot 2: Candlestick chart of Bitcoin prices
plt.figure(figsize=(10, 5))
plt.plot(df.index, df['Open'], color='green', label='Open')
plt.plot(df.index, df['Close'], color='red', label='Close')
plt.plot(df.index, df['High'], color='blue', label='High')
plt.plot(df.index, df['Low'], color='orange', label='Low')
plt.title('Bitcoin Candlestick Chart')
plt.xlabel('Time')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()

* Nothing is clear
* The value remains close for all the variables
* There is a huge increase in all the prices in 2018 and 2021 with 2018 peak to be lower than 2021.

In [None]:
f,ax = plt.subplots(figsize=(9, 9))
sns.heatmap(df.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()

* Open, High, Low, Close, and Adj Close: These variables represent the opening, highest, lowest, closing, and adjusted closing prices of Bitcoin for each day. They have a perfect positive correlation of 1.0 with each other, meaning they move in the same direction and by the same amount. This makes sense because the prices of Bitcoin are determined by the market demand and supply, which are reflected in these variables.

* Volume: This variable represents the number of Bitcoins traded for each day. It has a moderate positive correlation of 0.7 with the other variables, meaning it tends to move in the same direction but not by the same amount. This indicates that there is some relationship between the trading volume and the price movements of Bitcoin, but it is not very strong or consistent. There may be other factors that affect the volume, such as news, events, or sentiment.

# LSTM Model

## * First Step is Preparing Data for Training and Testing.

* Here we are just considering 1 year data for training data.

* Since Bitcoin price has drastically flucated from 200 dollar in year 2015 to 15000 dollar in year 2018 to 3000 dollar in year 2019(theses values are apporx) so we will just consider 2 Year to avoid this type of flucation in the data.

* As we want to predict Close Price of the Bitcoin so we are just Considering Close and Date.

In [None]:
# Lets First Take all the Close Price 
closedf = df.reset_index()[['Date', 'Close']]
print("Shape of close dataframe:", closedf.shape)

In [None]:
# Resampling the closedf based on daily and monthly

# Resampling to Daily frequency
dfclose_day = df.groupby(['Date']).mean(['Close']).reset_index()

dfclose_day['Date'] = pd.to_datetime(dfclose_day['Date'])
dfclose_day.set_index(['Date'], inplace=True)
dfclose_day = dfclose_day[['Close']]

# Resampling to monthly frequency
dfclose_month = dfclose_day.resample('M').mean(['Close'])


In [None]:
# prediction for 100 days
pred_days = 365
train = dfclose_day[:len(dfclose_day)-pred_days]
test = dfclose_day[len(dfclose_day)-pred_days:]
train.shape,test.shape

In [None]:
training_set = train.values
training_set.shape

In [None]:
# create train and val
scaler = MinMaxScaler()
training_set = scaler.fit_transform(training_set)
X_train = training_set[0:(len(training_set)-1)]
y_train = training_set[1:]
#need to be 3D
X_train = np.reshape(X_train, (len(X_train), 1, 1))
X_train.shape

In [None]:
#build model
model = Sequential()
model.add(LSTM(128,activation='relu',input_shape=(1,1),return_sequences=True))
model.add(Dense(32))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(optimizer='adam',loss='mse')

In [None]:
model.summary()

In [None]:
history = model.fit(X_train,y_train,epochs=50)

In [None]:
plt.plot(history.history['loss'],label='train_loss')

The graph shows that the model learned quickly from the data and reduced the loss, which is the difference between the model’s predictions and the actual targets. However, after a few epochs, the model stopped improving and the loss remained almost constant. This means that the model either reached its optimal performance or overfitted the data.

In [None]:
#prediction
test_set = test.values
test_set = scaler.transform(test_set)
test_set = np.reshape(test_set,(len(test_set),1,1))
pred = model.predict(test_set)
pred = np.reshape(pred,(len(test_set),1))
pred = list(scaler.inverse_transform(pred))
pred =pd.Series(pred,index=test.index)

In [None]:
fig,ax = plt.subplots()
line1, = ax.plot(test,label='real_price')
line2, = ax.plot(pred,label='predict_price')
ax.legend(handles=[line1, line2],loc='upper left')
plt.xticks(rotation='vertical')
plt.ylabel('Close Price')
plt.xlabel('Date')

In [None]:
# def predict_stock_prices_for_years(model, scaler, df, target_year):
#     # Filter the dataframe for years up to the target year
#     df_years = df[df.index.year <= target_year]

#     # Extract the Close prices
#     df_close_years = df_years[['Close']]

#     # Use the same scaler used for training data
#     test_set = scaler.transform(df_close_years.values)
#     test_set = np.reshape(test_set, (len(test_set), 1, 1))

#     # Make predictions
#     pred = model.predict(test_set)
#     pred = np.reshape(pred, (len(test_set), 1))
#     pred = list(scaler.inverse_transform(pred))
#     pred = pd.Series(pred, index=df_close_years.index)

#     # Plot the results
#     fig, ax = plt.subplots()
#     for year in range(df_years.index.year.min(), target_year + 1):
#         year_data = df_close_years[df_close_years.index.year == year]
#         pred_data = pred[pred.index.year == year]
#         line1, = ax.plot(year_data.index, year_data['Close'], label=f'Real Price {year}')
#         line2, = ax.plot(pred_data.index, pred_data, label=f'Predicted Price {year}')

#     ax.legend(loc='upper left')
#     plt.xticks(rotation='vertical')
#     plt.ylabel('Close Price')
#     plt.xlabel('Date')
#     plt.title(f'Stock Price Prediction up to {target_year}')

#     plt.show()

In [None]:
def predict_and_plot_stock_prices_for_years(model, scaler, df, target_year):
    # Check if target year is a leap year
    is_leap_year = (target_year % 4 == 0) and (target_year % 100 != 0) or (target_year % 400 == 0)
    num_days = 366 if is_leap_year else 365

    # Filter the dataframe for years up to the year before the target year
    df_years = df[df.index.year < target_year]

    # Extract the Close prices
    df_close_years = df_years[['Close']]

    # Use the same scaler used for training data
    test_set = scaler.transform(df_close_years.values)
    test_set = np.reshape(test_set, (len(test_set), 1, 1))

    # Initialize a list to store predictions
    predictions = []

    # Predict one day at a time for the target year
    for _ in range(num_days):
        # Make prediction for the next day
        pred = model.predict(test_set)
        # Append the prediction
        predictions.append(pred[-1, 0, 0])

        # Add the prediction to the test set to predict the next day
        test_set = np.concatenate((test_set, np.reshape(pred[-1], (1, 1, 1))), axis=0)

        # Remove the first element to keep the test set at the same size
        test_set = test_set[1:]

    # Rescale predictions back to original scale
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

    # Generate dates for the target year
    dates = pd.date_range(start=f'{target_year}-01-01', end=f'{target_year}-12-31')

    # Ensure that dates and predictions arrays have the same length
    dates = dates[:len(predictions)]

    # Plotting
    plt.figure(figsize=(12, 6))
    plt.plot(dates, predictions, label='Predicted Prices', color='blue')
    plt.title(f'Bitcoin Price Prediction for {target_year}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
    plt.legend()
    plt.grid(True)
    plt.show()

    return predictions

In [None]:
target_year = 2025

# Use the function to make predictions
predictions_for_target_year = predict_and_plot_stock_prices_for_years(model, scaler, df, 2024)


In [None]:
# Load the dataset
btc_data = pd.read_csv('BTC-USD.csv')

# Convert 'Date' to datetime and set as index
btc_data['Date'] = pd.to_datetime(btc_data['Date'])
btc_data.set_index('Date', inplace=True)

# Check for missing values
missing_values = btc_data.isnull().sum()

# Handle missing values (if any)
# For example, if there are missing values, you can fill them with the previous value:
# btc_data.fillna(method='ffill', inplace=True)

# Selecting the 'Close' column for analysis
btc_close = btc_data['Close']

# Display the first few rows
btc_close.head()


In [None]:
# Check for missing values in the 'Close' column
missing_in_close = btc_close.isnull().sum()
print(f"Missing values in 'Close': {missing_in_close}")


In [None]:
import matplotlib.pyplot as plt

btc_close.plot(figsize=(10, 6))
plt.title('Bitcoin Closing Price Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price (USD)')
plt.show()


Creating a Dataset Suitable for LSTM

In [None]:
import numpy as np

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

# Reshape into X=t, t+1, t+2, ..., t+n and Y=t+(n+1)
time_step = 100
X, y = create_dataset(btc_close.values.reshape(-1, 1), time_step)


Splitting Data into Training and Testing Sets

In [None]:
from sklearn.model_selection import train_test_split

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshaping the data to be compatible with LSTM layers
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

**Building and Training the LSTM Model**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Building the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(100, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Training the model
model.fit(X_train, y_train, batch_size=1, epochs=1)

In [None]:
model.summary() 

**Making Predictions and Plotting the Results**

In [None]:
# Making predictions
predictions = model.predict(X_test)

# Plotting the data
plt.figure(figsize=(10,6))
plt.plot(y_test, label='Real Bitcoin Price')
plt.plot(predictions, label='Predicted Bitcoin Price')
plt.title('Bitcoin Price Prediction')
plt.xlabel('Time')
plt.ylabel('Bitcoin Price (USD)')
plt.legend()
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Scale the 'Close' prices to values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_close = scaler.fit_transform(btc_close.values.reshape(-1, 1))

# Generate the dataset for the LSTM model
time_step = 100
X, y = create_dataset(scaled_close, time_step)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data for the LSTM layer
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Define the LSTM model architecture with dropout to prevent overfitting
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(100, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=1, epochs=1)

# Predict and inverse transform the scaled data
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Plotting the graph with scaled predictions
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))

plt.figure(figsize=(10,6))
plt.plot(y_test_scaled, label='Real Bitcoin Price')
plt.plot(predictions, label='Predicted Bitcoin Price')
plt.title('Bitcoin Price Prediction')
plt.xlabel('Time')
plt.ylabel('Bitcoin Price (USD)')
plt.legend()
plt.show()


In [None]:
# Assuming 'predictions' and 'y_test' are already inverse transformed to their original scale

# Get the last date from the training set as a starting point for predictions
last_train_date = btc_close.index[-1] - pd.Timedelta(days=len(y_test))

# Create a date range for predictions
prediction_dates = pd.date_range(start=last_train_date, periods=len(predictions), freq='D')

# Create a dataframe for the predictions with corresponding dates
predictions_df = pd.DataFrame(data=predictions, index=prediction_dates, columns=['Predicted'])
actual_df = pd.DataFrame(data=y_test_scaled, index=prediction_dates, columns=['Actual'])


In [None]:
# Resample to monthly or yearly averages
monthly_predictions = predictions_df['Predicted'].resample('M').mean()
monthly_actual = actual_df['Actual'].resample('M').mean()

# Plotting the aggregated data
plt.figure(figsize=(12,7))
plt.scatter(monthly_actual.index, monthly_actual, color='blue', label='Actual Prices', alpha=0.5)
plt.scatter(monthly_predictions.index, monthly_predictions, color='red', label='Predicted Prices', alpha=0.5)
plt.title('Monthly Bitcoin Price Prediction')
plt.xlabel('Date')
plt.ylabel('Bitcoin Price (USD)')
plt.legend()
plt.show()


In [None]:
# Define the date range for the prediction
start_date = pd.to_datetime('2023-12-31')  # End of the last date with actual data
end_date = pd.to_datetime('2024-12-31')  # End of 2025

# Number of days to predict
num_days = (end_date - start_date).days

# Get the last `time_step` days from the dataset as the base for future predictions
input_seq = btc_close[-time_step:].values.reshape(-1, 1)
input_seq = scaler.transform(input_seq)  # Assuming 'scaler' is your MinMaxScaler object

# List to hold the predictions
future_predictions = []

# Loop for each day in the prediction range
for _ in range(num_days):
    # Reshape the input for LSTM prediction
    lstm_input = input_seq[-time_step:].reshape(1, time_step, 1)
    
    # Predict the next price
    predicted_price = model.predict(lstm_input)
    
    # Append the prediction to the list
    future_predictions.append(scaler.inverse_transform(predicted_price)[0, 0])
    
    # Update the input sequence with the predicted price
    input_seq = np.append(input_seq, predicted_price, axis=0)

# Create a date range for the predictions
future_dates = pd.date_range(start=start_date, periods=num_days, freq='D')

# Create a dataframe for the predictions with corresponding dates
future_predictions_df = pd.DataFrame(data=future_predictions, index=future_dates, columns=['Predicted'])

# Plot the future predictions
plt.figure(figsize=(12, 7))
plt.plot(future_predictions_df.index, future_predictions_df['Predicted'], color='red', label='Predicted Future Prices')
plt.title('Bitcoin Price Prediction for 2025')
plt.xlabel('Date')
plt.ylabel('Predicted Bitcoin Price (USD)')
plt.legend()
plt.show()


In [None]:
# Define the target year
target_year = 2025  # You can change this to the year you want

# Step 1: Extract historical data from 2020 onwards
historical_data = btc_close[btc_close.index.year >= 2020]

# Calculate the number of days to predict based on the target year
last_historical_date = historical_data.index[-1]
end_prediction_date = pd.Timestamp(year=target_year, month=12, day=31)
num_days = (end_prediction_date - last_historical_date).days

# Step 2: Generate predictions for the future up to the target year

# Prepare the input sequence with the last known data points
input_seq = historical_data[-time_step:].values.reshape(-1, 1)
input_seq = scaler.transform(input_seq)

# Predict future prices
future_predictions = []
for _ in range(num_days):
    lstm_input = input_seq[-time_step:].reshape(1, time_step, 1)
    predicted_price = model.predict(lstm_input)
    future_predictions.append(scaler.inverse_transform(predicted_price)[0, 0])
    input_seq = np.append(input_seq, predicted_price, axis=0)

# Create a DataFrame for the future predictions with corresponding dates
future_dates = pd.date_range(start=last_historical_date + pd.Timedelta(days=1), periods=num_days, freq='D')
future_predictions_df = pd.DataFrame(data=future_predictions, index=future_dates, columns=['Predicted'])

# Step 3: Combine historical and predicted data into one DataFrame
combined_data = historical_data.to_frame(name='Actual')
combined_data = combined_data.join(future_predictions_df, how='outer')

# Step 4: Plot the combined data
plt.figure(figsize=(15, 8))
plt.plot(combined_data.index, combined_data['Actual'], label='Historical Prices', color='blue')
plt.plot(combined_data.index, combined_data['Predicted'], label='Predicted Future Prices', color='red', linestyle='--')
plt.title(f'Bitcoin Price and Predictions from 2020 to {target_year}')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()


In [None]:
import pandas as pd

def predict_price(input_date):
    # Convert input_date to pandas.Timestamp for comparison
    input_date = pd.Timestamp(input_date)

    # Check if the input date is within the range of your model's prediction
    if input_date < historical_data.index[0] or input_date > future_predictions_df.index[-1]:
        return "Date out of prediction range"

    if input_date in future_predictions_df.index:
        # Prediction for a future date
        predicted_price = future_predictions_df.loc[input_date]['Predicted']
    else:
        # Historical actual price
        predicted_price = historical_data.loc[input_date]['Actual']

    return predicted_price


In [None]:
print("Last historical date:", historical_data.index[-1])
print("Last predicted date:", future_predictions_df.index[-1])


In [None]:
import tkinter as tk
from datetime import datetime

def get_date():
    # Retrieve the date from the entry field
    date_string = date_entry.get()
    
    try:
        # Parse the date string into a datetime object
        entered_date = datetime.strptime(date_string, '%Y-%m-%d').date()
        
        # Call the prediction function
        predicted_price = predict_price(entered_date)

        # Clear the previous result
        result_label.config(text="")

        # Display the result
        result_label.config(text=f"The predicted price for {entered_date} is {predicted_price}")
        
    except ValueError:
        result_label.config(text="Please enter a valid date in YYYY-MM-DD format.")


# Set up the Tkinter window
window = tk.Tk()
window.title("Bitcoin Price Prediction")

# Set the window size and position it in the center of the screen
window.geometry("400x200")
window.eval('tk::PlaceWindow . center')

# Create a label, entry field, and button
label = tk.Label(window, text="Enter a date (YYYY-MM-DD):")
label.pack()

date_entry = tk.Entry(window)
date_entry.pack()

submit_button = tk.Button(window, text="Get Prediction", command=get_date)
submit_button.pack()

# Create a label to display the result
result_label = tk.Label(window, text="")
result_label.pack()

# Start the Tkinter event loop
window.mainloop()
