In [1]:
!pip install pandas numpy scikit-learn matplotlib seaborn



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [4]:

eth_file_path = "ETH_4H_Updated_Data_with_ATR___Daily_Open_Close.csv"  # Ensure file is in the working directory
eth_df = pd.read_csv(eth_file_path)


eth_df['Timestamp'] = pd.to_datetime(eth_df['Timestamp'])

eth_df.head()


Unnamed: 0.1,Unnamed: 0,Timestamp,Open,High,Low,Close,20EMA,50EMA,200EMA,EMA_20_50_Crossover,EMA_50_200_Crossover,RSI_14,High_Low_Diff,Open_Close_Diff,Future_4H_Change,ATR,Date,Daily_Open,Daily_Close
0,539,2022-01-01 00:00:00,3676.22,3748.45,3676.22,3723.96,3759.135,3853.269,4018.731,0,0,49.875,72.23,-47.74,-8.65,,2022-01-01,3676.22,3765.54
1,540,2022-01-01 04:00:00,3723.96,3765.27,3701.0,3715.31,3754.961,3847.859,4015.711,0,0,57.065,64.27,8.65,-21.94,64.27,2022-01-01,3676.22,3765.54
2,541,2022-01-01 08:00:00,3715.32,3733.84,3673.46,3693.37,3749.095,3841.8,4012.504,0,0,53.632,60.38,21.95,31.93,60.38,2022-01-01,3676.22,3765.54
3,542,2022-01-01 12:00:00,3693.38,3745.36,3691.88,3725.3,3746.829,3837.232,4009.646,0,0,51.41,53.48,-31.92,22.31,53.48,2022-01-01,3676.22,3765.54
4,543,2022-01-01 16:00:00,3725.29,3766.0,3718.71,3747.61,3746.904,3833.717,4007.039,0,0,50.869,47.29,-22.32,17.93,47.29,2022-01-01,3676.22,3765.54


In [5]:
# Create binary target: 1 = Price goes up, 0 = Price goes down
eth_df['Target'] = (eth_df['Future_4H_Change'] > 0).astype(int)


In [6]:
features = [
    '20EMA', '50EMA', '200EMA', 'EMA_20_50_Crossover', 'EMA_50_200_Crossover',
    'RSI_14', 'High_Low_Diff', 'Open_Close_Diff', 'ATR', 'Daily_Open', 'Daily_Close'
]

X = eth_df[features]
y = eth_df['Target']


In [7]:
features = [
    '20EMA', '50EMA', '200EMA', 'EMA_20_50_Crossover', 'EMA_50_200_Crossover',
    'RSI_14', 'High_Low_Diff', 'Open_Close_Diff', 'ATR', 'Daily_Open', 'Daily_Close'
]

X = eth_df[features]
y = eth_df['Target']


In [8]:
eth_df.ffill(inplace=True)  # Forward fill NaNs (best for time-series data)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [12]:
print("NaN values in X_train_scaled:", np.isnan(X_train_scaled).sum())
print("NaN values in y_train:", np.isnan(y_train).sum())

print("Infinite values in X_train_scaled:", np.isinf(X_train_scaled).sum())
print("Infinite values in y_train:", np.isinf(y_train).sum())

NaN values in X_train_scaled: 1
NaN values in y_train: 0
Infinite values in X_train_scaled: 0
Infinite values in y_train: 0


In [13]:
X_train_scaled = np.nan_to_num(X_train_scaled)
y_train = np.nan_to_num(y_train)

In [14]:
y_train = y_train.astype(int)


In [15]:
log_model = LogisticRegression()
log_model.fit(X_train_scaled, y_train)

In [16]:
y_pred = log_model.predict(X_test_scaled)

In [17]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"✅ Logistic Regression Model Accuracy: {accuracy:.3f}")
print("\n✅ Confusion Matrix:\n", conf_matrix)
print("\n✅ Classification Report:\n", report)

✅ Logistic Regression Model Accuracy: 0.637

✅ Confusion Matrix:
 [[382 268]
 [209 455]]

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.59      0.62       650
           1       0.63      0.69      0.66       664

    accuracy                           0.64      1314
   macro avg       0.64      0.64      0.64      1314
weighted avg       0.64      0.64      0.64      1314

