##Load the Data

In [6]:
import csv

# Full file path
file_path = "/home/lenovo/Documents/FODS/Bitcoin_Price_Dataset_2014_2023 (1).csv"

rows = []

with open(file_path, 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        rows.append(row)

print(f"Total rows loaded: {len(rows)}")

# Print a sample row (make sure index exists)
if len(rows) > 10:
    print("Sample row:", rows[10])
else:
    print("Dataset has less than 10 rows.")

Total rows loaded: 499
Sample row: {'Date': '2014-09-27', 'Open': '403.56', 'High': '406.62', 'Low': '397.37', 'Close': '399.52', 'Volume': '15029300', 'Daily_Return': '-1.21', 'Price_Range': '9.25', 'Price_Change': '-4.04', 'MA_7': '410.78', 'MA_30': '', 'MA_90': '', 'Volatility_30d': '', 'Day_of_Week': 'Saturday', 'Month': '9', 'Year': '2014', 'Quarter': '3'}


##Clean the Data

In [7]:
# We'll use these columns as features (inputs) and Close as the target (output)
features_to_use = ['Open', 'High', 'Low', 'Volume', 'MA_7', 'MA_30']
target = 'Close'

clean_data = []

for row in rows:
    try:
        # Try to convert each needed column to a float
        entry = {}
        for col in features_to_use + [target]:
            val = row[col].strip()
            if val == '' or val == 'None':
                raise ValueError("Missing value")  # Skip rows with empty values
            entry[col] = float(val)
        clean_data.append(entry)
    except ValueError:
        pass  # Skip this row if any value is missing or not a number

print(f"Rows after cleaning: {len(clean_data)}")

Rows after cleaning: 470


In [8]:
import random

random.shuffle(clean_data)  # Shuffle so we don't train only on old data

split_index = int(len(clean_data) * 0.8)  # 80% for training

train_data = clean_data[:split_index]
test_data = clean_data[split_index:]

print(f"Training rows: {len(train_data)}")
print(f"Testing rows: {len(test_data)}")

Training rows: 376
Testing rows: 94


In [9]:
def get_X_y(data, feature_cols, target_col):
    """Extract features (X) and target (y) from the data"""
    X = []
    y = []
    for row in data:
        X.append([row[col] for col in feature_cols])  # List of feature values
        y.append(row[target_col])                      # Target value
    return X, y

X_train, y_train = get_X_y(train_data, features_to_use, target)
X_test, y_test = get_X_y(test_data, features_to_use, target)

print(f"X_train shape: {len(X_train)} rows, {len(X_train[0])} features")

X_train shape: 376 rows, 6 features


In [10]:
def normalize(X):
    """Scale each feature to be between 0 and 1"""
    num_features = len(X[0])
    mins = [min(row[i] for row in X) for i in range(num_features)]
    maxs = [max(row[i] for row in X) for i in range(num_features)]
    
    X_scaled = []
    for row in X:
        scaled_row = []
        for i in range(num_features):
            if maxs[i] - mins[i] == 0:
                scaled_row.append(0)  # Avoid division by zero
            else:
                scaled_row.append((row[i] - mins[i]) / (maxs[i] - mins[i]))
        X_scaled.append(scaled_row)
    
    return X_scaled, mins, maxs

X_train_scaled, mins, maxs = normalize(X_train)
# Use the SAME mins/maxs from training to scale test data
X_test_scaled = [[(row[i] - mins[i]) / (maxs[i] - mins[i]) if maxs[i] - mins[i] != 0 else 0
                  for i in range(len(row))] for row in X_test]

In [11]:
def predict(X, weights, bias):
    """Make predictions: y = w1*x1 + w2*x2 + ... + bias"""
    results = []
    for row in X:
        pred = bias
        for i in range(len(row)):
            pred += weights[i] * row[i]
        results.append(pred)
    return results

def train_linear_regression(X, y, learning_rate=0.01, epochs=1000):
    """Train using gradient descent"""
    n = len(X)                        # Number of training examples
    num_features = len(X[0])
    
    # Start with all weights = 0
    weights = [0.0] * num_features
    bias = 0.0
    
    for epoch in range(epochs):
        # Step 1: Make predictions with current weights
        predictions = predict(X, weights, bias)
        
        # Step 2: Calculate errors
        errors = [predictions[i] - y[i] for i in range(n)]
        
        # Step 3: Calculate gradients (how much to adjust each weight)
        weight_gradients = [0.0] * num_features
        for i in range(n):
            for j in range(num_features):
                weight_gradients[j] += errors[i] * X[i][j]
        
        bias_gradient = sum(errors)
        
        # Step 4: Update weights (move in the direction that reduces error)
        for j in range(num_features):
            weights[j] -= learning_rate * (weight_gradients[j] / n)
        bias -= learning_rate * (bias_gradient / n)
        
        # Print progress every 100 epochs
        if epoch % 100 == 0:
            mse = sum(e**2 for e in errors) / n
            print(f"Epoch {epoch}: MSE = {mse:.2f}")
    
    return weights, bias

weights, bias = train_linear_regression(X_train_scaled, y_train)

Epoch 0: MSE = 89949.94
Epoch 100: MSE = 2923.36
Epoch 200: MSE = 386.43
Epoch 300: MSE = 251.56
Epoch 400: MSE = 203.63
Epoch 500: MSE = 171.54
Epoch 600: MSE = 149.15
Epoch 700: MSE = 133.41
Epoch 800: MSE = 122.26
Epoch 900: MSE = 114.28


In [12]:
def mean_squared_error(y_true, y_pred):
    n = len(y_true)
    return sum((y_true[i] - y_pred[i])**2 for i in range(n)) / n

def r_squared(y_true, y_pred):
    """R² tells us how well our model explains the variance in the data"""
    mean_y = sum(y_true) / len(y_true)
    ss_total = sum((y - mean_y)**2 for y in y_true)      # Total variance
    ss_residual = sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true)))  # Unexplained variance
    return 1 - (ss_residual / ss_total)

# Make predictions on test data
test_predictions = predict(X_test_scaled, weights, bias)

mse = mean_squared_error(y_test, test_predictions)
r2 = r_squared(y_test, test_predictions)

print(f"\n--- Results ---")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Root MSE: {mse**0.5:.2f}")  # In the same units as price (dollars)
print(f"R² Score: {r2:.4f}")        # Closer to 1.0 is better

# Show a few example predictions vs actual
print("\nSample predictions vs actual:")
for i in range(5):
    print(f"  Predicted: ${test_predictions[i]:.2f}  |  Actual: ${y_test[i]:.2f}")


--- Results ---
Mean Squared Error: 98.73
Root MSE: 9.94
R² Score: 0.9790

Sample predictions vs actual:
  Predicted: $338.00  |  Actual: $311.08
  Predicted: $363.14  |  Actual: $336.82
  Predicted: $226.63  |  Actual: $224.77
  Predicted: $348.47  |  Actual: $345.30
  Predicted: $228.66  |  Actual: $234.34
