In [1]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
file_path = "cleaned_coin_Bitcoin.csv" 
df = pd.read_csv(file_path)

In [3]:
df['Daily Return'] = df['Close'].pct_change() * 100

# Create a new column for Market Trend (Up, Down, Neutral)
df['Market Trend'] = df['Daily Return'].apply(lambda x: 'Up' if x > 0 else ('Down' if x < 0 else 'Neutral'))


In [4]:
# Encode the Market Trend labels as integers (Up = 0, Down = 1, Neutral = 2)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Market Trend'] = le.fit_transform(df['Market Trend'])

# Drop rows with missing values after creating 'Daily Return'
df = df.dropna()

In [5]:
df.head()


Unnamed: 0,Date,High,Low,Open,Close,Volume,Marketcap,Daily Return,50-Day MA,200-Day MA,Price Diff,Volatility,Open-Close Diff,High-Low Range,Market Trend
1,2013-11-15 23:59:59,437.890015,396.109985,419.410004,0.003788,0.0,5013561000.0,-0.929379,0.000102,3.1e-05,0.003168,0.382007,419.406216,41.780029,0
2,2013-11-16 23:59:59,450.26001,415.570007,417.279999,0.004139,0.0,5282849000.0,9.285081,0.000209,6.5e-05,0.002617,0.382844,417.27586,34.690002,2
3,2013-11-17 23:59:59,500.579987,440.23999,440.959991,0.004959,0.0,5907842000.0,19.796497,0.000335,0.000108,0.004611,0.395968,440.955033,60.339996,2
4,2013-11-18 23:59:59,703.780029,494.940002,496.579987,0.008298,0.0,8449070000.0,67.339269,0.000534,0.000176,0.016155,0.606395,496.571689,208.840027,2
5,2013-11-19 23:59:59,806.109985,456.390015,712.76001,0.006419,0.0,7022949000.0,-22.63744,0.000692,0.000231,0.027107,0.646978,712.75359,349.719971,0


In [6]:
# Define features (X) and target (y) for Market Trend prediction
X_trend = df[['Open', 'High', 'Low', 'Close', 'Volume', 'Marketcap', '50-Day MA', '200-Day MA', 'Price Diff', 'Volatility', 'Daily Return', 'Open-Close Diff', 'High-Low Range']]
y_trend = df['Market Trend']  # Target variable is Market Trend (Up, Down, Neutral)


In [7]:
# Split the data into training and testing sets (80% train, 20% test)
X_train_trend, X_test_trend, y_train_trend, y_test_trend = train_test_split(X_trend, y_trend, test_size=0.2, shuffle=False)

# Check for NaN values
print("Check for NaN values in X_train_trend:", X_train_trend.isnull().sum())

# Check for infinite values
import numpy as np
print("Check for infinity values in X_train_trend:", np.isinf(X_train_trend).sum())


Check for NaN values in X_train_trend: Open               0
High               0
Low                0
Close              0
Volume             0
Marketcap          0
50-Day MA          0
200-Day MA         0
Price Diff         0
Volatility         0
Daily Return       0
Open-Close Diff    0
High-Low Range     0
dtype: int64
Check for infinity values in X_train_trend: Open               0
High               0
Low                0
Close              0
Volume             0
Marketcap          0
50-Day MA          0
200-Day MA         0
Price Diff         0
Volatility         0
Daily Return       1
Open-Close Diff    0
High-Low Range     0
dtype: int64


In [8]:
# Replace NaN values with the mean of the column
X_train_trend = X_train_trend.apply(lambda x: x.fillna(x.mean()), axis=0)
X_test_trend = X_test_trend.apply(lambda x: x.fillna(x.mean()), axis=0)


# Replace infinity values with a large finite number
X_train_trend = X_train_trend.replace([np.inf, -np.inf], 0)
X_test_trend = X_test_trend.replace([np.inf, -np.inf], 0)

In [9]:
# You can similarly check for NaN and infinite values in the test set
print("Check for NaN values in X_test_trend:", X_test_trend.isnull().sum())
print("Check for infinity values in X_test_trend:", np.isinf(X_test_trend).sum())

Check for NaN values in X_test_trend: Open               0
High               0
Low                0
Close              0
Volume             0
Marketcap          0
50-Day MA          0
200-Day MA         0
Price Diff         0
Volatility         0
Daily Return       0
Open-Close Diff    0
High-Low Range     0
dtype: int64
Check for infinity values in X_test_trend: Open               0
High               0
Low                0
Close              0
Volume             0
Marketcap          0
50-Day MA          0
200-Day MA         0
Price Diff         0
Volatility         0
Daily Return       0
Open-Close Diff    0
High-Low Range     0
dtype: int64


In [10]:
# Handle missing values by filling them with the mean or using forward/backward filling
X_train_trend.fillna(X_train_trend.mean(), inplace=True)
X_test_trend.fillna(X_test_trend.mean(), inplace=True)


In [11]:
# Replace infinite values with NaN, then fill them with the mean or drop them
X_train_trend.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test_trend.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill NaN values (if any) with the mean
X_train_trend.fillna(X_train_trend.mean(), inplace=True)
X_test_trend.fillna(X_test_trend.mean(), inplace=True)


In [12]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the feature data using MinMaxScaler
scaler_trend = MinMaxScaler(feature_range=(0, 1))
X_train_trend_scaled = scaler_trend.fit_transform(X_train_trend)
X_test_trend_scaled = scaler_trend.transform(X_test_trend)


In [13]:
from sklearn.preprocessing import LabelEncoder

# Encode the 'Market Trend' target variable
encoder_trend = LabelEncoder()
y_train_trend_encoded = encoder_trend.fit_transform(y_train_trend)
y_test_trend_encoded = encoder_trend.transform(y_test_trend)


In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Encode the target variable (Market Trend) as it seems to be categorical
label_encoder = LabelEncoder()
y_train_trend_encoded = label_encoder.fit_transform(y_train_trend)
y_test_trend_encoded = label_encoder.transform(y_test_trend)

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

# Train the model
rf_model.fit(X_train_trend_scaled, y_train_trend_encoded)

# Make predictions
y_pred_trend_rf = rf_model.predict(X_test_trend_scaled)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test_trend_encoded, y_pred_trend_rf))
print("Classification Report:\n", classification_report(y_test_trend_encoded, y_pred_trend_rf))


Accuracy: 0.998211091234347
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       255
           2       1.00      1.00      1.00       304

    accuracy                           1.00       559
   macro avg       1.00      1.00      1.00       559
weighted avg       1.00      1.00      1.00       559



In [15]:
# Check feature importance
import matplotlib.pyplot as plt

importances = rf_model.feature_importances_
indices = np.argsort(importances)[::-1]

# Plot the feature importances
plt.figure(figsize=(10, 6))
plt.title("Feature Importance")
plt.bar(range(X_trend.shape[1]), importances[indices], align="center")
plt.xticks(range(X_trend.shape[1]), X_trend.columns[indices], rotation=90)
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'matplotlib'