<a href="https://colab.research.google.com/github/Adarsh-Kumar-2003/Crypto-Metrics-Analyzer/blob/main/Linear_Regression_365.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import numpy as np

# Define API endpoint and parameters
url = "https://min-api.cryptocompare.com/data/v2/histoday"
params = {
    "fsym": "BTC",       # Cryptocurrency symbol (e.g., BTC for Bitcoin)
    "tsym": "USD",       # Target currency (e.g., USD)
    "limit": 365,        # Number of days (e.g., last 365 days)
    "api_key": "31e4c1d445934de5a1aa6618a4c12c60fd49329298309ea0e197b8eca292527d"  # Replace with your actual API key
}

# Make the API request
response = requests.get(url, params=params)

# Parse the JSON data
data = response.json()["Data"]["Data"]

# Convert to a pandas DataFrame
df = pd.DataFrame(data)

# Convert epoch time to a readable date format
df["time"] = pd.to_datetime(df["time"], unit="s")

# Display the DataFrame with relevant columns
print(df[["time", "open", "high", "low", "close"]])


          time      open      high       low     close
0   2023-12-27  42517.32  43694.83  42121.77  43469.34
1   2023-12-28  43469.34  43817.61  42283.91  42588.94
2   2023-12-29  42588.94  43134.59  41296.86  42073.59
3   2023-12-30  42073.59  42595.81  41529.95  42146.03
4   2023-12-31  42146.03  42863.11  41970.37  42280.14
..         ...       ...       ...       ...       ...
361 2024-12-22  97223.39  97387.01  94186.04  95098.66
362 2024-12-23  95098.66  96428.13  92378.53  94771.64
363 2024-12-24  94771.64  99439.54  93437.90  98606.93
364 2024-12-25  98606.93  99484.75  97568.85  99356.06
365 2024-12-26  99356.06  99888.75  98410.47  98715.84

[366 rows x 5 columns]


In [2]:
def get_crypto_data(df, target_date):
    """
    Function to fetch cryptocurrency data for a specific date from a DataFrame.

    Parameters:
    - df (DataFrame): DataFrame containing Date, Open, High, Low, and Close columns.
    - target_date (str): Date in 'YYYY-MM-DD' format to search for.

    Returns:
    - dict: A dictionary containing Open, High, Low, Close values for the matched date.
    """
    # Convert target_date to datetime format for matching
    target_date = pd.to_datetime(target_date)

    # Filter DataFrame to match the date
    result = df[df['time'] == target_date]

    # If data exists for the date, return as a dictionary
    if not result.empty:
        return {
            'Date': result['time'].iloc[0],
            'Open': result['open'].iloc[0],
            'High': result['high'].iloc[0],
            'Low': result['low'].iloc[0],
            'Close': result['close'].iloc[0],
        }
    else:
        return f"No data found for the date: {target_date.strftime('%Y-%m-%d')}"


In [3]:
target_date = '2024-08-17'
result = get_crypto_data(df, target_date)
print(result)

{'Date': Timestamp('2024-08-17 00:00:00'), 'Open': 58893.67, 'High': 59708.69, 'Low': 58815.46, 'Close': 59497.94}


In [4]:
import pandas as pd
import numpy as np

def calculate_metrics(data, variable1, variable2):
    """
    Calculates historical and future metrics for cryptocurrency data using 'time' instead of 'Date'.

    Parameters:
    - data (pd.DataFrame): DataFrame with at least 'time', 'high', 'low', and 'close' columns.
    - variable1 (int): Look-back period for historical metrics.
    - variable2 (int): Look-forward period for future metrics.

    Returns:
    - pd.DataFrame: Updated DataFrame with calculated metrics.
    """
    # Ensure data is sorted by 'time'
    data = data.sort_values(by="time").reset_index(drop=True)

    # Historical Metrics
    data[f"High_Last_{variable1}_Days"] = data['high'].rolling(window=variable1, min_periods=1).max()
    data[f"Low_Last_{variable1}_Days"] = data['low'].rolling(window=variable1, min_periods=1).min()

    # Calculate the day since the last high and low
    data[f"Days_Since_High_Last_{variable1}_Days"] = (
        data['high'].rolling(window=variable1, min_periods=1).apply(lambda x: variable1 - np.argmax(x[::-1]) - 1)
    )
    data[f"Days_Since_Low_Last_{variable1}_Days"] = (
        data['low'].rolling(window=variable1, min_periods=1).apply(lambda x: variable1 - np.argmin(x[::-1]) - 1)
    )

    # Percentage difference from historical high and low
    data[f"%_Diff_From_High_Last_{variable1}_Days"] = (
        (data['close'] - data[f"High_Last_{variable1}_Days"]) / data[f"High_Last_{variable1}_Days"]
    ) * 100

    data[f"%_Diff_From_Low_Last_{variable1}_Days"] = (
        (data['close'] - data[f"Low_Last_{variable1}_Days"]) / data[f"Low_Last_{variable1}_Days"]
    ) * 100

    # Future Metrics
    data[f"High_Next_{variable2}_Days"] = data['high'].shift(-variable2).rolling(window=variable2, min_periods=1).max()
    data[f"Low_Next_{variable2}_Days"] = data['low'].shift(-variable2).rolling(window=variable2, min_periods=1).min()

    # Percentage difference from future high and low
    data[f"%_Diff_From_High_Next_{variable2}_Days"] = (
        (data['close'] - data[f"High_Next_{variable2}_Days"]) / data[f"High_Next_{variable2}_Days"]
    ) * 100

    data[f"%_Diff_From_Low_Next_{variable2}_Days"] = (
        (data['close'] - data[f"Low_Next_{variable2}_Days"]) / data[f"Low_Next_{variable2}_Days"]
    ) * 100

    return data


In [5]:
df_with_metrics = calculate_metrics(df, variable1=7, variable2=5)
print(df_with_metrics)

          time      high       low      open  volumefrom      volumeto  \
0   2023-12-27  43694.83  42121.77  42517.32    27420.29  1.178861e+09   
1   2023-12-28  43817.61  42283.91  43469.34    29112.04  1.249335e+09   
2   2023-12-29  43134.59  41296.86  42588.94    34268.41  1.450477e+09   
3   2023-12-30  42595.81  41529.95  42073.59    21511.57  9.058526e+08   
4   2023-12-31  42863.11  41970.37  42146.03    21509.24  9.128671e+08   
..         ...       ...       ...       ...         ...           ...   
361 2024-12-22  97387.01  94186.04  97223.39    29661.75  2.842280e+09   
362 2024-12-23  96428.13  92378.53  95098.66    57639.30  5.422281e+09   
363 2024-12-24  99439.54  93437.90  94771.64    40945.61  3.967575e+09   
364 2024-12-25  99484.75  97568.85  98606.93    21801.56  2.147050e+09   
365 2024-12-26  99888.75  98410.47  99356.06     5651.93  5.596161e+08   

        close conversionType conversionSymbol  High_Last_7_Days  \
0    43469.34         direct                

In [6]:
import tensorflow as tf
import keras
linear_layer = tf.keras.layers.Dense(units = 1, activation = 'linear')


In [7]:
import numpy as np

X = np.array([df_with_metrics['Days_Since_High_Last_7_Days'].to_numpy(),
              df_with_metrics['Days_Since_Low_Last_7_Days'].to_numpy(),
              df_with_metrics['%_Diff_From_High_Last_7_Days'].to_numpy(),
              df_with_metrics['%_Diff_From_Low_Last_7_Days'].to_numpy()])
print("Extracted Column as Array:", X)


Extracted Column as Array: [[ 6.          6.          5.         ...  0.          0.
   6.        ]
 [ 6.          5.          6.         ...  2.          1.
   0.        ]
 [-0.51605648 -2.80405526 -3.98018057 ... -7.41320246 -3.31989074
  -1.17421632]
 [ 3.19922453  1.10909394  1.88084518 ...  7.01391072  7.82690968
   7.13210612]]


In [8]:
import pandas as pd
import numpy as np

# Create a DataFrame
data = {
    'Feature1': df_with_metrics['Days_Since_High_Last_7_Days'].to_numpy(),
    'Feature2': df_with_metrics['Days_Since_Low_Last_7_Days'].to_numpy(),
    'Feature3': df_with_metrics['%_Diff_From_High_Last_7_Days'].to_numpy(),
    'Feature4': df_with_metrics['%_Diff_From_Low_Last_7_Days'].to_numpy()
}

df_new_w = pd.DataFrame(data)

# Extract all columns as a NumPy array for features
X = df_new_w.to_numpy()  # Converts entire DataFrame to NumPy array
# OR
X = df_new_w.values  # Alternative

print("Features Array:")
print(X)


Features Array:
[[ 6.          6.         -0.51605648  3.19922453]
 [ 6.          5.         -2.80405526  1.10909394]
 [ 5.          6.         -3.98018057  1.88084518]
 ...
 [ 0.          2.         -7.41320246  7.01391072]
 [ 0.          1.         -3.31989074  7.82690968]
 [ 6.          0.         -1.17421632  7.13210612]]


In [9]:
import pandas as pd
import numpy as np

# Create a DataFrame
data = {
    'Feature5': df_with_metrics['%_Diff_From_High_Next_5_Days'].to_numpy(),
    'Feature6': df_with_metrics['%_Diff_From_Low_Next_5_Days'].to_numpy()
}

df_new_y = pd.DataFrame(data)

# Extract all columns as a NumPy array for features
Y = df_new_y.to_numpy()  # Converts entire DataFrame to NumPy array
# OR
Y = df_new_y.values  # Alternative

print("Features Array:")
print(Y)


Features Array:
[[-1.67730168e+00  3.03887519e+00]
 [-7.25199035e+00  9.51992210e-01]
 [-8.37429316e+00  4.62249224e+00]
 [-8.21653704e+00  4.80262551e+00]
 [-7.92447916e+00  5.13611078e+00]
 [-3.74871132e+00  9.90419665e+00]
 [-1.19390347e+00  1.18257094e+01]
 [-9.39163066e+00  8.13962898e-01]
 [-7.78076659e+00  3.96701051e+00]
 [-7.81308916e+00  2.24275512e+00]
 [-1.03268130e+01  1.80174405e+00]
 [-1.04032389e+01  6.02170038e+00]
 [-4.21149732e+00  1.33485162e+01]
 [-5.97514374e+00  1.12615569e+01]
 [-4.86518442e+00  1.25749947e+01]
 [-3.41889015e-01  1.18167430e+01]
 [-1.80378902e+00  2.61163427e+00]
 [-1.65969617e+00  5.49742088e+00]
 [-4.26470754e+00  3.62219597e+00]
 [-2.45839339e+00  5.57731862e+00]
 [-1.21135099e-01  7.16657167e+00]
 [-3.19710050e-01  8.48408792e+00]
 [-2.05185714e+00  7.20376897e+00]
 [-5.66994635e-01  8.08346104e+00]
 [-4.43711044e-01  8.21747004e+00]
 [-1.55712919e+00  7.93987070e+00]
 [-6.40356770e+00  2.62588563e+00]
 [-6.88035346e+00  1.13400412e+00]
 [-7

In [10]:
print(X.shape)
print("\n")
print(Y.shape)

(366, 4)


(366, 2)


In [11]:
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras import regularizers
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam

# Step 1: Split into training+validation set and test set (80%-20%)
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Step 2: Split training+validation set into training set and validation set (75%-25% of train_val)
X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=0.15, random_state=42)

# Shapes of the datasets
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

scaler_X = MinMaxScaler(feature_range=(0, 1))
X_train_normalized = scaler_X.fit_transform(X_train)
X_val_normalized = scaler_X.fit_transform(X_val)
X_test_normalized = scaler_X.fit_transform(X_test)
X_val_normalized = np.delete(X_val_normalized, 32, axis=0)
print("Validation set shape:", X_val_normalized.shape)


Training set shape: (248, 4)
Validation set shape: (44, 4)
Test set shape: (74, 4)
Validation set shape: (43, 4)


In [12]:
n_outputs = 2  # Number of outputs
num_features = 4  # Number of input features

model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=n_outputs, activation='linear',kernel_regularizer=regularizers.l2(0.01)),
    #tf.keras.layers.Dense(units=n_outputs, activation='linear',kernel_regularizer=regularizers.l2(0.01))


])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
model.fit(X_train_normalized, Y_train, epochs=100, batch_size=10, verbose=1)


Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 44.1502 - mae: 4.8762
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 43.7413 - mae: 4.7274  
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 51.8484 - mae: 4.9225
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 43.6405 - mae: 4.7618
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 43.5878 - mae: 4.6299
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 48.3103 - mae: 4.8479 
Epoch 7/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 42.1542 - mae: 4.5540 
Epoch 8/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 38.3129 - mae: 4.4741
Epoch 9/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

<keras.src.callbacks.history.History at 0x7db7647719f0>

In [13]:
predictions = model.predict(X_val_normalized)

print(predictions)
print(predictions.shape)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[[-4.0972795  3.5874805]
 [-2.9285388  2.6171253]
 [-4.133398   4.284601 ]
 [-3.7534146  2.876812 ]
 [-3.6277652  3.857545 ]
 [-5.5183935  4.644391 ]
 [-5.326611   5.5270267]
 [-5.6522064  5.580373 ]
 [-3.0500708  2.706874 ]
 [-3.9253383  3.0215702]
 [-4.396665   3.442431 ]
 [-4.951323   4.851119 ]
 [-2.7791185  2.6071177]
 [-3.4096751  2.9678538]
 [-4.816491   4.8943486]
 [-4.6856976  3.3583617]
 [-3.7028413  2.788023 ]
 [-4.1608667  4.195365 ]
 [-3.9343724  3.4359922]
 [-5.045195   4.926197 ]
 [-3.6052785  2.8704014]
 [-5.2005315  5.503805 ]
 [-3.0534067  2.636571 ]
 [-4.6299014  3.5071077]
 [-4.065776   4.858853 ]
 [-3.713766   3.4517298]
 [-3.6326566  3.025652 ]
 [-2.6737323  2.472686 ]
 [-5.342652   5.1794295]
 [-4.6740203  4.438502 ]
 [-4.306261   3.466776 ]
 [-3.6621447  2.8641605]
 [-3.9275904  2.8842545]
 [-5.389096   5.676035 ]
 [-4.8159738  4.8251095]
 [-5.218472   5.3480086]
 [-4.8588867  5.459893 ]
 [-

In [14]:
#print(X_val)
print(Y_val.shape)
#Y_val_new = Y_val
Y_val_new = np.delete(Y_val, 32, axis=0)
nan_positions = np.argwhere(np.isnan(Y_val))
print(nan_positions)


(44, 2)
[[32  0]
 [32  1]]


In [15]:

#import matplotlib.pyplot as plt
#import numpy as np# Create subplots for each output
#num_outputs = Y_val.shape[1]  # Number of outputs
#fig, axs = plt.subplots(num_outputs, 1, figsize=(8, 6), sharex=True)

# Loop over each output and create a subplot
#for i in range(num_outputs):
    # Actual outputs (line plot)
 #   axs[i].scatter(X_val[:,0], Y_val[:, i], label=f'Actual Output Y{i+1}', linestyle='-', marker='o', color='blue')

    # Predicted outputs (scatter plot)
  #  axs[i].scatter(X_val[:,0], predictions[:, i], label=f'Predicted Output Y{i+1}', marker='x', color='red')

    # Add titles and labels
   # axs[i].set_title(f'Output Y{i+1}')
    #axs[i].set_ylabel('Output Value')
    #axs[i].legend()
    #axs[i].grid(True)

# Add shared X-axis label
#plt.xlabel('Input Feature (e.g., X1)')
#plt.tight_layout()
#plt.show()


In [16]:
import pandas as pd

data = {}
for i in range(Y_test.shape[1]):
    data[f'Actual Output {i+1}'] = Y_val_new[:, i]
    data[f'Predicted Output {i+1}'] = predictions[:, i]

comparison_df = pd.DataFrame(data)
print(comparison_df)

    Actual Output 1  Predicted Output 1  Actual Output 2  Predicted Output 2
0         -2.748277           -4.097280         2.981332            3.587481
1         -1.283248           -2.928539         1.063946            2.617125
2         -3.041229           -4.133398        13.179568            4.284601
3         -8.893502           -3.753415         0.366810            2.876812
4         -3.606982           -3.627765         1.659271            3.857545
5         -8.374293           -5.518394         4.622492            4.644391
6         -4.230969           -5.326611         3.733227            5.527027
7         -0.013118           -5.652206         9.126310            5.580373
8         -5.570338           -3.050071         6.549026            2.706874
9         -8.928645           -3.925338         0.665708            3.021570
10        -2.813755           -4.396665         1.628416            3.442431
11        -0.472348           -4.951323         8.291688            4.851119

In [17]:
print(Y_test.shape)
print(predictions.shape)

(74, 2)
(43, 2)


In [18]:
comparison_df.to_csv('comparison.csv', index=False)
print("Full comparison saved to 'comparison.csv'")
from google.colab import files
files.download('comparison.csv')

Full comparison saved to 'comparison.csv'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_train_normalized = scaler_X.fit_transform(X_train)

In [21]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Explicitly calculate MSE for two outputs
mse_output_1 = mean_absolute_error(Y_val_new[:, 0], predictions[:, 0])
mse_output_2 = mean_absolute_error(Y_val_new[:, 1], predictions[:, 1])

print(f"Output 1 MSE: {mse_output_1}")
print(f"Output 2 MSE: {mse_output_2}")

Output 1 MSE: 3.2176121457195093
Output 2 MSE: 2.943804350454337
