In [1]:
#Importing necessary libraries for model training

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # for ploting graphs
import plotly.express as px # for ploting graphs
import plotly.graph_objects as go # for ploting graphs
from plotly.subplots import make_subplots # to create the subplots
from datetime import datetime # for extracting data and time 
from sklearn.model_selection import train_test_split # for separating the training and testing data
from sklearn.linear_model import LinearRegression # for model creation
import joblib # used to save model in .pkl format

In [2]:
USDC = pd.read_csv("USDC-USD.csv")
USDC

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-10-08,1.004100,1.007680,1.001900,1.00221,1.00221,382900
1,2018-10-09,1.004270,1.008330,1.000030,1.00686,1.00686,108803
2,2018-10-10,1.004940,1.019080,1.003970,1.00962,1.00962,711783
3,2018-10-11,1.009940,1.031230,1.001730,1.00978,1.00978,4177290
4,2018-10-12,1.008610,1.018710,1.001140,1.01272,1.01272,1322240
...,...,...,...,...,...,...,...
1790,2023-09-02,1.000197,1.000527,0.999917,1.00017,1.00017,1764318129
1791,2023-09-03,1.000139,1.000839,0.999864,1.00013,1.00013,1878209470
1792,2023-09-04,1.000053,1.000428,0.999790,1.00010,1.00010,2275770718
1793,2023-09-05,0.999920,1.000409,0.999842,1.00003,1.00003,2529858601


# Data Pre-Processing

In [3]:
#Dropping unnecessary columns
USDC.drop(['Adj Close'], axis=1, inplace=True)
USDC

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2018-10-08,1.004100,1.007680,1.001900,1.00221,382900
1,2018-10-09,1.004270,1.008330,1.000030,1.00686,108803
2,2018-10-10,1.004940,1.019080,1.003970,1.00962,711783
3,2018-10-11,1.009940,1.031230,1.001730,1.00978,4177290
4,2018-10-12,1.008610,1.018710,1.001140,1.01272,1322240
...,...,...,...,...,...,...
1790,2023-09-02,1.000197,1.000527,0.999917,1.00017,1764318129
1791,2023-09-03,1.000139,1.000839,0.999864,1.00013,1878209470
1792,2023-09-04,1.000053,1.000428,0.999790,1.00010,2275770718
1793,2023-09-05,0.999920,1.000409,0.999842,1.00003,2529858601


In [4]:
USDC.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1795 entries, 0 to 1794
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    1795 non-null   object 
 1   Open    1795 non-null   float64
 2   High    1795 non-null   float64
 3   Low     1795 non-null   float64
 4   Close   1795 non-null   float64
 5   Volume  1795 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 84.3+ KB


In [5]:
USDC.isnull().sum()

Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [6]:
USDC.duplicated().sum()

0

# The data is clean enough for further processing

## i) Data Visualization

In [7]:
import plotly.graph_objects as go

plt.figure(figsize=(14, 6))
fig = px.line(USDC.Open,x=USDC.Date, y='Open', labels={'x': 'Date', 'y': 'Price'}, title='Open Price Over Time')
fig.update_xaxes(type='category')  # Ensure the x-axis treats the index as categorical (date)
fig.update_xaxes(tickfont=dict(color='#B468FF'))  # Set x-axis label color to light purple
fig.update_yaxes(tickfont=dict(color='#B468FF'))
fig.update_layout(
    paper_bgcolor='#180736',  # Set the background color to white
    plot_bgcolor='#180736',  # Set the background color to white
    template="plotly_dark"  # Use the "plotly_dark" template
)
fig.update_traces(line=dict(color='#B468FF'))

fig.update_xaxes(showgrid=True)
fig.update_yaxes(showgrid=True)
fig.show()
fig.show()

<Figure size 1400x600 with 0 Axes>

In [8]:
#Creating Subplots for high and low columns

fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1)

# Second subplot (High Price)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['High'], mode='lines', name='High Price'), row=1, col=1)
fig.update_yaxes(title_text="High Price", row=1, col=1)

# Third subplot (Low Price)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Low'], mode='lines', name='High Price'), row=2, col=1)
fig.update_yaxes(title_text="Low Price", row=2, col=1)

# Fourth subplot(High and Low Prices)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['High'], mode='lines', name='High Price'), row=3, col=1)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Low'], mode='lines', name='Low Price'), row=3, col=1)
fig.update_yaxes(title_text="High and Low Price", row=3, col=1)

# setting up the title and size of the graph
fig.update_layout(title_text="High, Low and High & Low Prices Over Time for BTC",
    height=500,  # Set the height
    width=1000) # width
fig.show()

In [9]:
#Subplots for open and close prices

fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1)

# First subplot (Open Price)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Open'], mode='lines', name='Open Price'), row=1, col=1)
fig.update_yaxes(title_text="Open Price", row=1, col=1)

# Second subplot (Close Price)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Close'], mode='lines', name='Close Price'), row=2, col=1)
fig.update_yaxes(title_text="Close Price", row=2, col=1)

# Third subplot(Open and Close Prices)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Open'], mode='lines', name='Open Price'), row=3, col=1)
fig.add_trace(go.Scatter(x=USDC['Date'], y=USDC['Close'], mode='lines', name='Close Price'), row=3, col=1)
fig.update_yaxes(title_text="Open and Close Price", row=3, col=1)

# setting up the title and size of the graph
fig.update_layout(title_text="Open, Close and Open & Close Prices Over Time for BTC",
    height=500,  # Set the height
    width=1000) # width
fig.show()

In [10]:
# making output column one day ahead of the closing price column
USDC['Target_Price'] = USDC['Close'].shift(-1)
USDC

Unnamed: 0,Date,Open,High,Low,Close,Volume,Target_Price
0,2018-10-08,1.004100,1.007680,1.001900,1.00221,382900,1.00686
1,2018-10-09,1.004270,1.008330,1.000030,1.00686,108803,1.00962
2,2018-10-10,1.004940,1.019080,1.003970,1.00962,711783,1.00978
3,2018-10-11,1.009940,1.031230,1.001730,1.00978,4177290,1.01272
4,2018-10-12,1.008610,1.018710,1.001140,1.01272,1322240,1.00743
...,...,...,...,...,...,...,...
1790,2023-09-02,1.000197,1.000527,0.999917,1.00017,1764318129,1.00013
1791,2023-09-03,1.000139,1.000839,0.999864,1.00013,1878209470,1.00010
1792,2023-09-04,1.000053,1.000428,0.999790,1.00010,2275770718,1.00003
1793,2023-09-05,0.999920,1.000409,0.999842,1.00003,2529858601,1.00014


In [11]:
USDC.isnull().sum()

Date            0
Open            0
High            0
Low             0
Close           0
Volume          0
Target_Price    1
dtype: int64

In [12]:
# droping the row with null value
USDC.dropna(inplace=True)

## Separating the input and output data

### For this, we will make use of the open and close columns

In [13]:
# input features
X_USDC=USDC[['Open', 'Close']]

# output features
y_USDC=USDC['Target_Price']

# Now, training and testing the model

## We will give 0.10 % data to our model for testing to make accurate and close predictions

In [14]:
X_USDC_train, X_USDC_test, y_USDC_train, y_USDC_test=train_test_split(X_USDC, y_USDC, test_size=.10, random_state=50)

In [15]:
model_USDC=LinearRegression()
model_USDC.fit(X_USDC_train, y_USDC_train)

## Checking out the accuracy score of the model

In [16]:
model_USDC.score(X_USDC_test, y_USDC_test)

0.6654946929761136

### Dumping the model as a .pkl file

In [17]:
USDC_model=joblib.dump(model_USDC,"USDC_predictions.pkl")

### Printing out the last and latest coin price from the data

In [18]:
USDC.tail(1)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Target_Price
1793,2023-09-05,0.99992,1.000409,0.999842,1.00003,2529858601,1.00014


### Now, loading the .pkl file and entering the last open and close values to make the coin predictions for the next day

In [19]:
USDC_model_predictions=joblib.load("USDC_predictions.pkl")

USDC_last_Open=float(input("Enter the last Open value: "))
print("You entered USDC_last_Open value: ", USDC_last_Open)

USDC_last_Close=float(input("Enter the last Close value: "))
print("You entered USDC_last_Close value: ", USDC_last_Close)
print(" ")

USDC_Input=pd.DataFrame({"Open": [USDC_last_Open], "Close": [USDC_last_Close]})
USDC_future_pred=USDC_model_predictions.predict(USDC_Input)
    
print("Your predicted value of USDC coin is==> ", USDC_future_pred)

ValueError: could not convert string to float: ''