In [1]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
from scipy.stats import uniform

In [2]:
# Load your data (example CSV files)
data = pd.read_csv('ADJ data.csv')

In [3]:
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['date'])

# Filter data for one specific company
company_name = '1.RELIANCE'
company_data = data[data['stock'] == company_name].copy()

# Split data into features and target
X = company_data.drop(['date', 'stock', 'Date', 'close'], axis=1)  # Assuming 'Close' is the target
y = company_data['close']

In [4]:
# Train-test split based on date
X_train = X[company_data['date'] <= '2022-03-31']
y_train = y[company_data['date'] <= '2022-03-31']
X_test = X[company_data['date'] > '2022-03-31']
y_test = y[company_data['date'] > '2022-03-31']

In [5]:
# Initialize the scaler
scaler = StandardScaler()

# Fit on training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform test data using the same scaler
X_test_scaled = scaler.transform(X_test)

In [6]:
# Define SVR and parameter grid
svr = SVR()
param_grid = {
    'kernel': ['rbf', 'linear', 'poly','sigmoid'],
    'C': [1,5,10,20,50,100,500,1000,2000,5000],
    'gamma': ['scale', 'auto'],
    'epsilon': [0.005,0.01,0.02,0.05,0.1,0.2,0.5]
}



In [7]:
# Randomized search
random_search = RandomizedSearchCV(
    estimator=svr,
    param_distributions=param_grid,
    n_iter=20,
    scoring='neg_mean_squared_error',
    cv=5,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

# Fit model
random_search.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [8]:
# Step 6: Evaluate Best Model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Best Parameters:", random_search.best_params_)
print("Test MSE:", mse)
print("Test R² Score:", r2)

Best Parameters: {'kernel': 'linear', 'gamma': 'auto', 'epsilon': 0.005, 'C': 500}
Test MSE: 17.09804550238262
Test R² Score: 0.9999734337687858


In [9]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])


# Define hyperparameter space
param_distributions = {
    'svr__C': uniform(loc=0.1, scale=500),         # Range: 0.1 to 100
    'svr__epsilon': uniform(loc=0.01, scale=1),    # Range: 0.01 to 1
    'svr__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svr__degree': [2, 3, 4],                      # Only relevant for 'poly' kernel
    'svr__gamma': ['scale', 'auto']
}

In [10]:
# Randomized search
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_distributions,
    n_iter=50,  # Number of combinations to try
    cv=5,
    scoring='neg_mean_squared_error',
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Fit model
random_search.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [11]:
# Step 6: Evaluate Best Model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Best Parameters:", random_search.best_params_)
print("Test MSE:", mse)
print("Test R² Score:", r2)

Best Parameters: {'svr__C': 114.06758127097083, 'svr__degree': 3, 'svr__epsilon': 0.010520376995315819, 'svr__gamma': 'scale', 'svr__kernel': 'linear'}
Test MSE: 18.362628451044735
Test R² Score: 0.9999714689124518


In [12]:
#Creating the Support Vector Regression Model 
svrmodel = SVR(kernel = 'linear', C = 500, gamma= 'auto', epsilon = 0.005) 

#Training the model using the training set 
svrmodel.fit(X_train_scaled, y_train) 

#Testing the model 
predictions = svrmodel.predict(X_test_scaled) 

#Model Evaluation 
mse = mean_squared_error(y_test, predictions)
#print(predictions) 
print('Mean Squared Error(MSE) : ', mse)

Mean Squared Error(MSE) :  17.09804550238262


In [15]:
#Creating the Support Vector Regression Model 
svrmodel = SVR(kernel = 'linear', C = 114.06758127097083, gamma= 'scale', epsilon = 0.010520376995315819) 

#Training the model using the training set 
svrmodel.fit(X_train_scaled, y_train) 

#Testing the model 
predictions = svrmodel.predict(X_test_scaled) 

#Model Evaluation 
mse = mean_squared_error(y_test, predictions)
#print(predictions) 
print('Mean Squared Error(MSE) : ', mse)

Mean Squared Error(MSE) :  18.36262771791675


In [16]:
#Creating the Support Vector Regression Model 
svrmodel = SVR(kernel = 'poly', C = 114.06758127097083, degree= 3, gamma= 'scale', epsilon = 0.010520376995315819) 

#Training the model using the training set 
svrmodel.fit(X_train_scaled, y_train) 

#Testing the model 
predictions = svrmodel.predict(X_test_scaled) 

#Model Evaluation 
mse = mean_squared_error(y_test, predictions)
#print(predictions) 
print('Mean Squared Error(MSE) : ', mse)

Mean Squared Error(MSE) :  328056.9484244808


In [17]:
#Creating the Support Vector Regression Model 
svrmodel = SVR(kernel = 'linear', C = 114.06758127097083, epsilon = 0.010520376995315819) 

#Training the model using the training set 
svrmodel.fit(X_train_scaled, y_train) 

#Testing the model 
predictions = svrmodel.predict(X_test_scaled) 

#Model Evaluation 
mse = mean_squared_error(y_test, predictions)
#print(predictions) 
print('Mean Squared Error(MSE) : ', mse)

Mean Squared Error(MSE) :  18.36262771791675
