In [12]:
!pip install pandas numpy scikit-learn matplotlib seaborn




In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [14]:
# Load the dataset
btc_file_path = "BTC_4H_Updated_Data_with_ATR___Daily_Open_Close.csv"  # Ensure file is in the working directory
btc_df = pd.read_csv(btc_file_path)

# Convert Timestamp to datetime format
btc_df['Timestamp'] = pd.to_datetime(btc_df['Timestamp'])

# Display first few rows
btc_df.head()


Unnamed: 0.1,Unnamed: 0,Timestamp,Open,High,Low,Close,20EMA,50EMA,200EMA,EMA_20_50_Crossover,EMA_50_200_Crossover,RSI_14,High_Low_Diff,Open_Close_Diff,Future_4H_Change,ATR,Date,Daily_Open,Daily_Close
0,539,2022-01-01 00:00:00,46216.93,46949.99,46208.37,46813.2,47456.721,48266.618,50386.474,0,0,47.669,741.62,-596.27,381.53,,2022-01-01,46216.93,47722.65
1,540,2022-01-01 04:00:00,46813.21,47555.55,46591.23,47194.73,47431.77,48224.583,50354.715,0,0,55.296,964.32,-381.52,-435.86,964.32,2022-01-01,46216.93,47722.65
2,541,2022-01-01 08:00:00,47194.73,47344.69,46715.39,46758.87,47367.684,48167.105,50318.936,0,0,51.748,629.3,435.86,460.17,629.3,2022-01-01,46216.93,47722.65
3,542,2022-01-01 12:00:00,46758.87,47491.14,46756.3,47219.04,47353.527,48129.926,50288.091,0,0,51.639,734.84,-460.17,110.74,734.84,2022-01-01,46216.93,47722.65
4,543,2022-01-01 16:00:00,47219.04,47954.63,47186.07,47329.78,47351.266,48098.547,50258.655,0,0,48.645,768.56,-110.74,392.87,768.56,2022-01-01,46216.93,47722.65


In [15]:
# Create binary target: 1 = Price goes up, 0 = Price goes down
btc_df['Target'] = (btc_df['Future_4H_Change'] > 0).astype(int)


In [16]:
features = [
    '20EMA', '50EMA', '200EMA', 'EMA_20_50_Crossover', 'EMA_50_200_Crossover',
    'RSI_14', 'High_Low_Diff', 'Open_Close_Diff', 'ATR', 'Daily_Open', 'Daily_Close'
]

X = btc_df[features]
y = btc_df['Target']


In [18]:
btc_df.ffill(inplace=True)  # Corrected forward fill for missing values



In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [20]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [22]:
print("NaN values in X_train_scaled:", np.isnan(X_train_scaled).sum())
print("NaN values in y_train:", np.isnan(y_train).sum())

print("Infinite values in X_train_scaled:", np.isinf(X_train_scaled).sum())
print("Infinite values in y_train:", np.isinf(y_train).sum())


NaN values in X_train_scaled: 1
NaN values in y_train: 0
Infinite values in X_train_scaled: 0
Infinite values in y_train: 0


In [23]:
X_train_scaled = np.nan_to_num(X_train_scaled)
y_train = np.nan_to_num(y_train)


In [24]:
y_train = y_train.astype(int)


In [25]:
log_model = LogisticRegression()
log_model.fit(X_train_scaled, y_train)


In [26]:
y_pred = log_model.predict(X_test_scaled)


In [27]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results
print(f"✅ Logistic Regression Model Accuracy: {accuracy:.3f}")
print("\n✅ Confusion Matrix:\n", conf_matrix)
print("\n✅ Classification Report:\n", report)


✅ Logistic Regression Model Accuracy: 0.625

✅ Confusion Matrix:
 [[380 246]
 [247 441]]

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.61      0.61       626
           1       0.64      0.64      0.64       688

    accuracy                           0.62      1314
   macro avg       0.62      0.62      0.62      1314
weighted avg       0.62      0.62      0.62      1314

