Step-1: Starting by loading the data, converting the date column to datetime format, and selecting relevant features.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
from google.colab import files
uploaded = files.upload()

# Load the dataset into a pandas DataFrame
amazon_data_new = pd.read_csv('Amazon.csv')

# Convert the 'Date' column to datetime
amazon_data_new['Date'] = pd.to_datetime(amazon_data_new['Date'])

# Select relevant features for the regression model
# using 'Open', 'High', 'Low', 'Volume' as features to predict 'Close' price
features = ['Open', 'High', 'Low', 'Volume']
target = 'Close'
X_new = amazon_data_new[features]
y_new = amazon_data_new[target]

Saving Amazon.csv to Amazon.csv


Step 2: Split the Data into Training and Test Sets


In [None]:
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y_new, test_size=0.3, random_state=42)

Step 3: Scale the Data for SVR Model, Support Vector Regression requires feature scaling, so we apply StandardScaler to the features.

In [None]:
scaler_new = StandardScaler()
X_train_scaled_new = scaler_new.fit_transform(X_train_new)
X_test_scaled_new = scaler_new.transform(X_test_new)

Step 4 :We now train the Decision Tree Regressor model using the unscaled data.


In [None]:
decision_tree_model_new = DecisionTreeRegressor(random_state=42)
decision_tree_model_new.fit(X_train_new, y_train_new)

# Predict the target variable using the test set
y_pred_dt_new = decision_tree_model_new.predict(X_test_new)

Step 5: we train the Support Vector Regression model using the scaled data.

In [None]:
svr_model_new = SVR(kernel='rbf')
svr_model_new.fit(X_train_scaled_new, y_train_new)

# Predictions for SVR
y_pred_svr_new = svr_model_new.predict(X_test_scaled_new)

Step 6: Evaluate both models using Mean Squared Error (MSE) and R-squared metrics to understand their performance.

In [None]:
mse_dt_new = mean_squared_error(y_test_new, y_pred_dt_new)
r2_dt_new = r2_score(y_test_new, y_pred_dt_new)

mse_svr_new = mean_squared_error(y_test_new, y_pred_svr_new)
r2_svr_new = r2_score(y_test_new, y_pred_svr_new)

Step 7: Define a function to calculate "accuracy" based on predictions within a certain percentage range

In [None]:
def calculate_accuracy(y_true, y_pred, tolerance=0.05):
    # Calculate the absolute percentage error
    percentage_error = np.abs((y_true - y_pred) / y_true)
    # Check how many predictions fall within the tolerance range (5% in this case)
    accuracy = np.mean(percentage_error <= tolerance)
    return accuracy * 100  # Convert to percentage

# Calculate accuracy for Decision Tree model (within 5% tolerance)
accuracy_dt_new = calculate_accuracy(y_test_new, y_pred_dt_new)

# Calculate accuracy for SVR model (within 5% tolerance)
accuracy_svr_new = calculate_accuracy(y_test_new, y_pred_svr_new)

Output Results

In [None]:
# Display updated evaluation results with "range-based accuracy"
evaluation_results_new = {
    "Model": ["Decision Tree", "SVR"],
    "MSE": [mse_dt_new, mse_svr_new],
    "R-Squared": [r2_dt_new, r2_svr_new],
    "Accuracy (±5%)": [accuracy_dt_new, accuracy_svr_new]
}

# Convert to DataFrame for better display
evaluation_df_new = pd.DataFrame(evaluation_results_new)
print(evaluation_df_new)

           Model            MSE  R-Squared  Accuracy (±5%)
0  Decision Tree     155.840206   0.999778       95.289659
1            SVR  337457.989275   0.519191       21.981592
