In [4]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score
import pandas as pd

# Load dataset and prepare features and target
sales_predict_df = pd.read_csv("/home/mo/Documents/UNC/Sales_Automated_Tool/Alex's Model/Resources/Train_Output_reg_final.csv")
features_df = sales_predict_df.drop(columns=['Item_Outlet_Sales','Item_Identifier','Item_Weight'])
features_df = pd.get_dummies(features_df)

X = features_df.values
y = sales_predict_df['Item_Outlet_Sales'].values

# Data split and standardization
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameters and run RandomizedSearchCV
param_dist = {'C': [400], 'epsilon': [0.01], 'kernel': ['linear'], 'degree': [2]}
random_search = RandomizedSearchCV(SVR(), param_dist, n_iter=1, scoring='r2', cv=5)
random_search.fit(X_train_scaled, y_train)

# Train with best parameters and predict
best_params = random_search.best_params_
svm_regressor = SVR(**best_params).fit(X_train_scaled, y_train)
y_pred = svm_regressor.predict(X_test_scaled)

# Calculate and display R-squared
print(f'R-squared: {r2_score(y_test, y_pred)}')


R-squared: 0.43032958842333036
