In [11]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.stattools import adfuller
import plotly.graph_objects as go
from ipywidgets import interact, Dropdown
from plotly.subplots import make_subplots
import matplotlib.ticker as ticker
import statsmodels.api as sm
import matplotlib.dates as mdates
import plotly.express as px
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import warnings
# Load environment variables
_ = load_dotenv()

In [12]:
from dash import Input, Output
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import ipywidgets as widgets
from ipywidgets import interact, IntSlider, Dropdown, IntRangeSlider
import ipywidgets as widgets

from dash import dcc, html

In [15]:
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Configuration to suppress warnings
warnings.filterwarnings('ignore')

# Additional configurations for better control over visualizations (optional)
plt.style.use('ggplot')  # For ggplot-like style in plots
pd.options.display.max_columns = None  # Ensure all columns are visible when displaying DataFrames

## Load the Dateset

In [14]:
# Define the path to the dataset
file_path = 'RAW_Data/updated_data_715.csv'
# Load the dataset
df = pd.read_csv(file_path)

#### Interactive Time Series Plot

In [None]:
# Generate the interactive line plot
fig = px.line(df, x=df.index, y='pct_change', color='product_id', 
              title='Interactive Time Series of Percentage Changes',
              labels={'pct_change': 'Percentage Change (%)', 'product_id': 'Cryptocurrency'})

# Enhance the plot with a range slider for better navigation
fig.update_xaxes(rangeslider_visible=True)

# Update traces to be hidden by default
for trace in fig.data:
    trace.visible = 'legendonly'

fig.show()

### Distribution Analysis - Histograms

In [None]:
# Create a histogram of percentage changes to analyze the distribution
fig = px.histogram(df, x='pct_change', color='product_id',
                   marginal='box',  # adds a boxplot alongside the histogram
                   barmode='overlay',
                   nbins=100,  # adjust the number of bins for better resolution
                   title='Distribution of Percentage Changes by Cryptocurrency')

# Set all traces to be hidden by default, appear on legend click
for trace in fig.data:
    trace.visible = 'legendonly'

fig.update_layout(
    barmode='overlay',  # Overlays bars on top of each other
    bargap=0.1          # Sets gap between bars of adjacent location coordinates.
)

fig.show()

### Interactive Scatter Plot of Percentage Change vs. Volume

In [None]:
# Generate the scatter plot of percentage change vs volume
fig = px.scatter(df, x='volume', y='pct_change', color='product_id',
                 title='Volume vs. Percentage Change by Cryptocurrency',
                 labels={'volume': 'Trading Volume', 'pct_change': 'Percentage Change (%)'})

# Set all traces to be hidden by default, appear on legend click
for trace in fig.data:
    trace.visible = 'legendonly'

fig.show()

### Day of the Week Analysis

In [None]:
# Extract day of the week and map it to day names
df['day_of_week'] = df.index.dayofweek
day_map = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
df['day_name'] = df['day_of_week'].map(day_map)

# Calculate average percentage change by day of the week for each product
avg_pct_change_by_day = df.groupby(['product_id', 'day_of_week', 'day_name'])['pct_change'].mean().reset_index()

# Ensure the data is sorted by day of the week to avoid plotting issues
avg_pct_change_by_day.sort_values(by=['product_id', 'day_of_week'], inplace=True)

# Generate the interactive line plot
fig = px.line(avg_pct_change_by_day, x='day_name', y='pct_change', color='product_id',
              title='Average Percentage Change by Day of the Week',
              labels={'day_name': 'Day of the Week', 'pct_change': 'Average Percentage Change (%)'},
              category_orders={'day_name': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']})

# Set all traces to be hidden by default, appear on legend click
for trace in fig.data:
    trace.visible = 'legendonly'

fig.show()


### Testing Whether the Data is Statioary
Stationarity in time series data means that the statistical properties of the series like mean, variance, and autocorrelation do not change over time. For financial time series data like cryptocurrency percentage changes, ensuring stationarity is essential for making reliable forecasts.

In [None]:
### Add Title


def test_stationarity(timeseries, window=12):
    # Calculate rolling statistics
    rolling_mean = timeseries.rolling(window=window).mean()
    rolling_std = timeseries.rolling(window=window).std()

    # Prepare Plotly graph
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=timeseries.index, y=timeseries, name='Original', line=dict(color='#4FBAF7')))
    fig.add_trace(go.Scatter(x=timeseries.index, y=rolling_mean, name='Rolling Mean', line=dict(color='red')))
    fig.add_trace(go.Scatter(x=timeseries.index, y=rolling_std, name='Rolling Std Dev', line=dict(color='black')))
    
    # Perform Dickey-Fuller test
    result = adfuller(timeseries.dropna(), autolag='AIC', maxlag=20)
    test_stat = result[0]
    p_value = result[1]
    critical_values = result[4]

    # Add Dickey-Fuller test results to the plot
    fig.update_layout(title=f'Dickey-Fuller Test: Test Statistic = {test_stat:.2f}, p-value = {p_value:.4f}',
                      xaxis_title='Time',
                      yaxis_title='Value',
                      showlegend=True)
    for key, value in critical_values.items():
        fig.add_hline(y=value, line_dash="dot",
                      annotation_text=f'Critical Value {key}', 
                      annotation_position="bottom right")
    
    fig.show()

# Dropdown to select cryptocurrency
dropdown = Dropdown(options=df['product_id'].unique())

# Function to update plot based on selected cryptocurrency
def update_plot(crypto):
    crypto_data = df[df['product_id'] == crypto]['pct_change']
    test_stationarity(crypto_data)

# Show dropdown and update plot based on selection
interact(update_plot, crypto=dropdown)

interactive(children=(Dropdown(description='crypto', options=('ADA-USD', 'SOL-USD', 'MATIC-USD', 'LINK-USD', '…

<function __main__.update_plot(crypto)>

#### Time Series Visualization with Trend Lines

In [None]:
# Create a subplot layout
fig = make_subplots(rows=len(df['product_id'].unique()), cols=1, shared_xaxes=True)

# Iterate over each unique cryptocurrency for plotting
for i, (label, df_product) in enumerate(df.groupby('product_id')):
    df_product = df_product.dropna(subset=['pct_change'])  # Drop NaN values for clean plotting
    if len(df_product) > 1:
        # Add line trace for percentage changes
        fig.add_trace(go.Scatter(x=df_product.index, y=df_product['pct_change'],
                                 mode='lines', name=f'{label} % Change'), row=i+1, col=1)
        
        # Add trend line using numpy's polyfit
        valid_dates = df_product.index
        coefficients = np.polyfit(range(len(df_product)), df_product['pct_change'], 1)
        trendline = np.polyval(coefficients, range(len(df_product)))
        fig.add_trace(go.Scatter(x=valid_dates, y=trendline,
                                 mode='lines', name=f'{label} Trend', line=dict(dash='dash')), row=i+1, col=1)

# Update layout to make it cleaner
fig.update_layout(height=900, width=1100, title_text="Percentage Change with Trend Lines by Cryptocurrency")
fig.update_xaxes(title_text="Time")
fig.update_yaxes(title_text="Percentage Change (%)", ticksuffix="%")

# Set all traces to be hidden by default, appear on legend click
for trace in fig.data:
    trace.visible = 'legendonly'

fig.show()


This segment creates a subplot for each cryptocurrency with its close prices over time, enhanced with trend lines to easily visualize the overall direction of price movements.

## Data Cleaning & Feature Engineering

In [None]:
# Print the column names of the DataFrame
print(df.columns)


Index(['low', 'high', 'open', 'close', 'volume', 'price_change',
       'average_price', 'volatility', 'product_id', 'pct_change',
       'day_of_week', 'day_name'],
      dtype='object')


In [None]:
# Check if the DataFrame is sorted by time
is_sorted = df.index.is_monotonic_increasing
print(f"The DataFrame is sorted by time: {is_sorted}")


The DataFrame is sorted by time: True


In [None]:
# Print the DataFrame's index to check if 'time' is the index
print(df.index)


DatetimeIndex(['2021-07-16', '2021-07-16', '2021-07-16', '2021-07-16',
               '2021-07-16', '2021-07-16', '2021-07-16', '2021-07-16',
               '2021-07-16', '2021-07-17',
               ...
               '2024-07-08', '2024-07-08', '2024-07-08', '2024-07-08',
               '2024-07-08', '2024-07-08', '2024-07-08', '2024-07-08',
               '2024-07-08', '2024-07-08'],
              dtype='datetime64[ns]', name='time', length=10764, freq=None)


In [None]:
print(df.index.dtype)
print("Data timeline:")
print("Start date:", df.index.min())
print("End date:", df.index.max())
print("Number of days:", (df.index.max() - df.index.min()).days)

datetime64[ns]
Data timeline:
Start date: 2021-07-16 00:00:00
End date: 2024-07-08 00:00:00
Number of days: 1088


In [None]:
# Adding lag features for close prices and volume
for lag in [1, 3, 7]:  # Example lags: previous day, 3 days ago, and a week ago
    df[f'close_lag_{lag}'] = df['close'].shift(lag)
    df[f'volume_lag_{lag}'] = df['volume'].shift(lag)

# Calculate Moving Average (MA)
df['close_MA10'] = df['close'].rolling(window=10).mean()

# Calculate Exponential Moving Average (EMA) for different periods
for window in [7, 14, 30]:
    df[f'close_ema_{window}'] = df['close'].ewm(span=window, adjust=False).mean()

# Calculate Relative Strength Index (RSI)
delta = df['close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))

# Calculate Moving Average Convergence Divergence (MACD)
exp1 = df['close'].ewm(span=12, adjust=False).mean()
exp2 = df['close'].ewm(span=26, adjust=False).mean()
df['MACD'] = exp1 - exp2
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Calculate Rolling Volatility
for window in [7, 14, 30]:
    df[f'volatility_{window}d'] = df['pct_change'].rolling(window=window).std() * np.sqrt(window)

# Calculate Market Momentum
for window in [1, 7, 30]:  # momentum over 1 day, 1 week, and 1 month
    df[f'momentum_{window}d'] = df['close'] / df['close'].shift(window) - 1

# Seasonal Indicators
df['day_of_week'] = df.index.dayofweek
df['month_of_year'] = df.index.month

# Clean up any resulting missing values before modeling
df.dropna(inplace=True)

# Define the split point for training and testing datasets in a time series manner
split_date = '2023-12-31'  # or '2024-01-01'
train = df[df.index < split_date]
test = df[df.index >= split_date]

# Ensure the test set comprises approximately 20% of the data
test_size = 0.2
train_size = 1 - test_size
train = train.tail(int(len(df) * train_size))

# Increase display settings for columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# Print detailed information about the DataFrames
print("Training and Test set sizes:", train.shape, test.shape)
print("\nTraining data range: {} to {}".format(train.index.min(), train.index.max()))
print("Testing data range: {} to {}".format(test.index.min(), test.index.max()))

# Print full information on a few rows to understand the dataset structure better
print("\nDetailed sample of Training Set:")
print(train.head())

print("\nDetailed sample of Testing Set:")
print(test.head())

Training and Test set sizes: (8587, 32) (1900, 32)

Training data range: 2021-08-15 00:00:00 to 2023-12-30 00:00:00
Testing data range: 2023-12-31 00:00:00 to 2024-07-08 00:00:00

Detailed sample of Training Set:
                   low        high        open       close        volume  price_change  average_price  volatility product_id  pct_change  day_of_week day_name  close_lag_1  volume_lag_1  close_lag_3  volume_lag_3  close_lag_7  volume_lag_7   close_MA10   close_ema_7  close_ema_14  close_ema_30        RSI         MACD  Signal_Line  volatility_7d  volatility_14d  volatility_30d  momentum_1d   momentum_7d  momentum_30d  month_of_year
time                                                                                                                                                                                                                                                                                                                                                            

In [None]:
# Check data type of index
print("Data type of index:", df.index.dtype)

# Display range of dates in the data
print("Start date:", df.index.min())
print("End date:", df.index.max())


Data type of index: datetime64[ns]
Start date: 2021-07-19 00:00:00
End date: 2024-07-08 00:00:00


In [None]:
check_max = train.reset_index()
check_max['time'].max()

Timestamp('2023-12-30 00:00:00')

In [None]:
check_min = test.reset_index()
check_min['time'].min()

Timestamp('2023-12-31 00:00:00')

The output confirms that the data has been successfully partitioned into training and testing sets, maintaining a clear chronological order, which is essential for time series analysis. Specifically, the training set encompasses data from August 5, 2021, to December 30, 2023, while the testing set begins immediately thereafter, spanning from December 31, 2023, to July 8, 2024. This split ensures that the temporal sequence is preserved, avoiding potential data leakage and making the setup suitable for subsequent time series forecasting models. The training set comprises 8,684 observations, and the testing set includes 1,900 observations, maintaining a suitable split that is ideal for robust model training and validation phases. This structured approach reflects a sound methodological practice in time series analysis, ensuring a reliable foundation for model development and evaluation.

In [None]:
# Check DataFrame to confirm changes
print(df.describe())
print("\nDataFrame info:\n", df.info())


                low          high          open         close        volume  price_change  average_price    volatility     pct_change   day_of_week   close_lag_1  volume_lag_1   close_lag_3  volume_lag_3   close_lag_7  volume_lag_7    close_MA10   close_ema_7  close_ema_14  close_ema_30           RSI          MACD   Signal_Line  volatility_7d  volatility_14d  volatility_30d    momentum_1d    momentum_7d   momentum_30d  month_of_year
count  10734.000000  10734.000000  10734.000000  10734.000000  1.073400e+04  10734.000000   10734.000000  10734.000000   10734.000000  10734.000000  10734.000000  1.073400e+04  10734.000000  1.073400e+04  10734.000000  1.073400e+04  10734.000000  10734.000000  10734.000000  10734.000000  10734.000000  10734.000000  10734.000000   10734.000000    10734.000000    10734.000000   10734.000000   10734.000000   10734.000000   10734.000000
mean    3939.548077   4122.555492   4033.659810   4035.929777  1.051920e+07      2.269967    4031.051784      7.365536    1361

## EDA

### Visualize Rolling Volatility

In [None]:

# Interactive plot for rolling volatility
fig = px.line(df.reset_index(), x='time', y='volatility_30d', color='product_id',
              labels={'volatility_30d': '30-Day Volatility'},
              title='30-Day Rolling Volatility for Each Cryptocurrency')

# Update to make individual lines selectable
fig.update_traces(visible="legendonly")  # Make all lines invisible by default; they appear when clicked in legend.

fig.update_xaxes(rangeslider_visible=True)
fig.show()

### Interactive Market Momentum

In [None]:
# Interactive plot for market momentum
fig_momentum = px.line(df.reset_index(), x='time', y='momentum_30d', color='product_id',
                       labels={'momentum_30d': '30-Day Momentum'},
                       title='30-Day Market Momentum for Each Cryptocurrency')

# Update to make individual lines selectable
fig_momentum.update_traces(visible="legendonly")  # Make all lines invisible by default; they appear when clicked in legend.

fig_momentum.update_xaxes(rangeslider_visible=True)
fig_momentum.show()

### Interactive Box Plot for Seasonal Patterns

In [None]:
# Mapping numerical days to day names
days = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
df['day_name'] = df['day_of_week'].map(days)

# Interactive box plot for day of the week effects, focusing on percentage change
fig_season = px.box(df.reset_index(), x='day_name', y='pct_change', color='product_id',
                    labels={'day_name': 'Day of the Week', 'pct_change': 'Percentage Change'},
                    title='Percentage Change Distribution by Day of Week for Each Cryptocurrency')

# Update the traces to be toggleable via the legend, this is a workaround since 'legendonly' isn't directly available
for trace in fig_season.data:
    trace.visible = "legendonly"  # Hide all traces; they appear when their legend entry is clicked.

fig_season.show()

### Interactive Exponential Moving Averages and Percentage Change

In [None]:
# Interactive plot for EMA and Percentage Changes
fig_ema = px.line(df.reset_index(), x='time', y=['pct_change', 'close_ema_30'], color='product_id',
                  labels={'value': 'Percentage', 'variable': 'Type'},
                  title='Percentage Changes and 30-Day Exponential Moving Averages')

# Update the traces to make individual lines selectable via the legend
fig_ema.update_traces(visible="legendonly")  # Make all lines invisible by default; they appear when clicked in legend.

fig_ema.update_xaxes(rangeslider_visible=True)
fig_ema.show()

In [None]:
# Data preparation
df_long = df.reset_index().melt(id_vars=['time', 'product_id'], value_vars=['pct_change', 'close_ema_30'],
                                var_name='Type', value_name='Value')

# Interactive plot for EMA and Percentage Changes
fig_ema = px.line(df_long, x='time', y='Value', color='product_id', line_dash='Type',
                  labels={'Value': 'Percentage', 'Type': 'Metric'},
                  title='Percentage Changes and 30-Day Exponential Moving Averages',
                  color_discrete_map={'pct_change': 'blue', 'close_ema_30': 'red'})

# Update the traces to make individual lines selectable via the legend
fig_ema.for_each_trace(lambda t: t.update(visible="legendonly"))

# Customization for a clearer legend and range slider
fig_ema.update_traces(mode='lines')
fig_ema.update_xaxes(rangeslider_visible=True)
fig_ema.update_layout(legend_title_text='Cryptocurrency and Metric')

fig_ema.show()

### Analysis of Percentage Changes and 30-Day EMAs Across Cryptocurrencies

In [None]:
# Data preparation for plotting
df_eda = df.reset_index().melt(id_vars=['time', 'product_id'], value_vars=['pct_change', 'close_ema_30'],
                                var_name='Metric', value_name='Value')

# Interactive plot for Percentage Changes and 30-Day Exponential Moving Averages
fig_ema = px.line(df_eda, x='time', y='Value', color='product_id', line_dash='Metric',
                  labels={'Value': 'Percentage', 'Metric': 'Metric Type'},
                  title='Analysis of Percentage Changes and 30-Day EMAs Across Cryptocurrencies',
                  color_discrete_map={'pct_change': 'blue', 'close_ema_30': 'red'})

# Update the traces to make individual lines selectable via the legend
fig_ema.for_each_trace(lambda t: t.update(visible="legendonly"))

# Customize plot to enhance clarity
fig_ema.update_traces(mode='lines')
fig_ema.update_xaxes(rangeslider_visible=True)
fig_ema.update_layout(legend_title_text='Cryptocurrency and Metric')

fig_ema.show()

### Dashboard

In [16]:
# app = dash.Dash(__name__, suppress_callback_exceptions=True)

# app.layout = html.Div([
#     html.H1("Comprehensive Cryptocurrency Analysis Dashboard"),
#     dcc.Dropdown(
#         id='crypto-selector',
#         options=[{'label': i, 'value': i} for i in df['product_id'].unique()],
#         value='BTC-USD'
#     ),
#     dcc.DatePickerRange(
#         id='date-picker',
#         min_date_allowed=df.index.min(),
#         max_date_allowed=df.index.max(),
#         start_date=df.index.min(),
#         end_date=df.index.max()
#     ),
#     dcc.Graph(id='crypto-analysis-chart')
# ])

# @app.callback(
#     Output('crypto-analysis-chart', 'figure'),
#     [Input('crypto-selector', 'value'),
#      Input('date-picker', 'start_date'),
#      Input('date-picker', 'end_date')]
# )
# def update_crypto_analysis(selected_crypto, start_date, end_date):
#     filtered_data = df[(df['product_id'] == selected_crypto) &
#                        (df.index >= pd.to_datetime(start_date)) & (df.index <= pd.to_datetime(end_date))]

#     fig = make_subplots(rows=6, cols=1, shared_xaxes=True, vertical_spacing=0.02,
#                         subplot_titles=('Percentage Change', 'Volume', 'RSI', 'MACD', 'Momentum', 'Volatility'))

#     # Percentage Change plot
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['pct_change'], name='Percentage Change'), row=1, col=1)

#     # Volume plot
#     fig.add_trace(go.Bar(x=filtered_data.index, y=filtered_data['volume'], name='Volume'), row=2, col=1)

#     # RSI plot with critical levels
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['RSI'], name='RSI'), row=3, col=1)
#     fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1)
#     fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1)

#     # MACD plot
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['MACD'], name='MACD'), row=4, col=1)
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['Signal_Line'], name='Signal Line', line=dict(color='red', dash='dot')), row=4, col=1)

#     # Momentum plot
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['momentum_7d'], name='7-Day Momentum'), row=5, col=1)

#     # Volatility plot
#     fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['volatility_30d'], name='30-Day Volatility'), row=6, col=1)

#     fig.update_layout(height=1200, title_text=f"Comprehensive Analysis for {selected_crypto}")

#     return fig

# if __name__ == '__main__':
#     app.run_server(debug=True)


## Final Checks Before Model Development

In [None]:
# Display final structure of the dataset and check for any anomalies
print("Final Dataset Structure:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())

# Confirm train-test split
print("\nTraining Set Preview:")
print(train.head())
print("\nTest Set Preview:")
print(test.head())


Final Dataset Structure:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10734 entries, 2021-07-19 to 2024-07-08
Data columns (total 32 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   low             10734 non-null  float64
 1   high            10734 non-null  float64
 2   open            10734 non-null  float64
 3   close           10734 non-null  float64
 4   volume          10734 non-null  float64
 5   price_change    10734 non-null  float64
 6   average_price   10734 non-null  float64
 7   volatility      10734 non-null  float64
 8   product_id      10734 non-null  object 
 9   pct_change      10734 non-null  float64
 10  day_of_week     10734 non-null  int32  
 11  day_name        10734 non-null  object 
 12  close_lag_1     10734 non-null  float64
 13  volume_lag_1    10734 non-null  float64
 14  close_lag_3     10734 non-null  float64
 15  volume_lag_3    10734 non-null  float64
 16  close_lag_7     10734 non-null  fl

## Model Development

In [None]:
# # Prepare the feature matrix and target vector
# features = df.drop(['pct_change', 'product_id'], axis=1)  # Dropping non-numeric and target columns
# target = df['pct_change']

# # Splitting the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# # Initialize the Lasso Regression model with an alpha value
# lasso = Lasso(alpha=0.01, random_state=42)

# # Fit the model to the training data
# lasso.fit(X_train, y_train)

# # Predict on the training and test sets
# train_preds = lasso.predict(X_train)
# test_preds = lasso.predict(X_test)

# # Calculate and print performance metrics
# train_rmse = np.sqrt(mean_squared_error(y_train, train_preds))
# test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))
# train_r2 = r2_score(y_train, train_preds)
# test_r2 = r2_score(y_test, test_preds)

# print(f"Training RMSE: {train_rmse}")
# print(f"Test RMSE: {test_rmse}")
# print(f"Training R^2: {train_r2}")
# print(f"Test R^2: {test_r2}")

In [None]:
# Put in all the metrics we would wanna display on our streamlit app!
#df[['time', 'open', 'close']].to_csv('RAW_Data/test.csv')

- Working on the peercentage change using the best variable and also looking up if its better to keep it as an decimal
- wroking on some of the plots to make them clearer and using specific time line(7-day)
- 

-try to modify the dashboard in a way tha we can click and change like hakkoda web!