In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load the dataset
df = pd.read_csv("cleaned_coin_Bitcoin.csv")

# Display the first few rows to verify the data
print(df.head())

# Display basic information about the dataset
print(df.info())

print(df.columns)


                  Date        High         Low        Open     Close  Volume  \
0  2013-11-14 23:59:59  425.899994  395.190002  406.410004  0.003823     0.0   
1  2013-11-15 23:59:59  437.890015  396.109985  419.410004  0.003788     0.0   
2  2013-11-16 23:59:59  450.260010  415.570007  417.279999  0.004139     0.0   
3  2013-11-17 23:59:59  500.579987  440.239990  440.959991  0.004959     0.0   
4  2013-11-18 23:59:59  703.780029  494.940002  496.579987  0.008298     0.0   

      Marketcap  Daily Return  50-Day MA  200-Day MA  Price Diff  Volatility  \
0  5.038818e+09      0.503122   0.000000    0.000000    0.002307    0.381402   
1  5.013561e+09      0.457140   0.000102    0.000031    0.003168    0.382007   
2  5.282849e+09      0.530312   0.000209    0.000065    0.002617    0.382844   
3  5.907842e+09      0.610910   0.000335    0.000108    0.004611    0.395968   
4  8.449070e+09      1.000000   0.000534    0.000176    0.016155    0.606395   

   Open-Close Diff  High-Low Range  
0

In [3]:
# Define the Market Trend based on moving averages and closing price
def define_market_trend(row):
    if row['50-Day MA'] > row['200-Day MA'] and row['Close'] > row['50-Day MA']:
        return 1  # Bullish trend
    elif row['50-Day MA'] < row['200-Day MA'] and row['Close'] < row['50-Day MA']:
        return 0  # Bearish trend
    else:
        return 2  # Neutral trend

# Apply the function to create the 'Market Trend' column
df['Market Trend'] = df.apply(define_market_trend, axis=1)

# Inspect the first few rows of the updated DataFrame
df[['Date', 'Close', '50-Day MA', '200-Day MA', 'Market Trend']].head()


Unnamed: 0,Date,Close,50-Day MA,200-Day MA,Market Trend
0,2013-11-14 23:59:59,0.003823,0.0,0.0,2
1,2013-11-15 23:59:59,0.003788,0.000102,3.1e-05,1
2,2013-11-16 23:59:59,0.004139,0.000209,6.5e-05,1
3,2013-11-17 23:59:59,0.004959,0.000335,0.000108,1
4,2013-11-18 23:59:59,0.008298,0.000534,0.000176,1


In [4]:
df.columns

Index(['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap',
       'Daily Return', '50-Day MA', '200-Day MA', 'Price Diff', 'Volatility',
       'Open-Close Diff', 'High-Low Range', 'Market Trend'],
      dtype='object')

In [5]:
df.head()


Unnamed: 0,Date,High,Low,Open,Close,Volume,Marketcap,Daily Return,50-Day MA,200-Day MA,Price Diff,Volatility,Open-Close Diff,High-Low Range,Market Trend
0,2013-11-14 23:59:59,425.899994,395.190002,406.410004,0.003823,0.0,5038818000.0,0.503122,0.0,0.0,0.002307,0.381402,406.406181,30.709991,2
1,2013-11-15 23:59:59,437.890015,396.109985,419.410004,0.003788,0.0,5013561000.0,0.45714,0.000102,3.1e-05,0.003168,0.382007,419.406216,41.780029,1
2,2013-11-16 23:59:59,450.26001,415.570007,417.279999,0.004139,0.0,5282849000.0,0.530312,0.000209,6.5e-05,0.002617,0.382844,417.27586,34.690002,1
3,2013-11-17 23:59:59,500.579987,440.23999,440.959991,0.004959,0.0,5907842000.0,0.61091,0.000335,0.000108,0.004611,0.395968,440.955033,60.339996,1
4,2013-11-18 23:59:59,703.780029,494.940002,496.579987,0.008298,0.0,8449070000.0,1.0,0.000534,0.000176,0.016155,0.606395,496.571689,208.840027,1


In [6]:
# Drop the 'Date' column and rows with missing values
df = df.drop(columns=['Date']).dropna()

# Define features and target
X = df.drop(columns=['Market Trend'])
y = df['Market Trend']


In [7]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

In [8]:
# Predict and evaluate the model
y_pred = rf.predict(X_test)

In [9]:
# Print evaluation metrics
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))

Accuracy: 0.9445438282647585
              precision    recall  f1-score   support

           0       0.96      0.97      0.96       251
           1       0.94      0.89      0.92        74
           2       0.93      0.94      0.93       234

    accuracy                           0.94       559
   macro avg       0.94      0.93      0.94       559
weighted avg       0.94      0.94      0.94       559



In [11]:
import joblib

# Save the trained Random Forest model
joblib.dump(rf, 'RF-MarketTrend.joblib')


['RF-MarketTrend.joblib']