In [4]:
# Import necessary libraries
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score
import pandas as pd

# Load the dataset
sales_predict_df = pd.read_csv("../Alex's Model/Resources/Train_Output_CSV.csv")

# Prepare features and target
# Drop the target column "Item_Outlet_Sales" from the feature DataFrame
features_df = sales_predict_df.drop(columns=['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier','Item_Visibility'])

features_df = pd.get_dummies(features_df, columns=['Item_Type',"Item_Fat_Content","Outlet_Type" ,'Outlet_Location_Type','Outlet_Size' ,'Outlet_Establishment_Year' ])

# Extract target variables
target_df = sales_predict_df['Item_Outlet_Sales']
features_df.info()

X = features_df.values
y = target_df.values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameter distributions for random search
param_dist = {
    'C': [0.01, 0.1, 1, 10, 100],  # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2, 0.3],  # Epsilon for epsilon-insensitive loss
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'degree': [2, 3, 4]  # Degree for polynomial kernel
}

# Create SVM model
svm_regressor = SVR()

# Create randomized search object
random_search = RandomizedSearchCV(estimator=svm_regressor, param_distributions=param_dist, n_iter=40, scoring='r2', cv=5)

# Fit randomized search to the data
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and score
best_params = random_search.best_params_
best_r2 = random_search.best_score_
print("Best Parameters: ", best_params)
print("Best R^2 Score: ", best_r2)

# Train the model using the best parameters
svm_regressor = SVR(**best_params)
svm_regressor.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = svm_regressor.predict(X_test_scaled)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

# # Define the hyperparameter grid for Randomized Search
# param_grid = {
#     'n_estimators': [100, 200, 300, 900],
#     'max_depth': [10, 20, 30, 200],
#     'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 3, 9],
#     'bootstrap': [True, False]
# }

# # Create a RandomizedSearchCV object
# random_search = RandomizedSearchCV(
#     estimator=RandomForestRegressor(random_state=3),
#     param_distributions=param_grid,
#     n_iter=30,
#     scoring='r2',
#     cv=5,
#     random_state=3,
#     n_jobs=-1
# )

# # Fit the RandomizedSearchCV on the training data
# random_search.fit(X_train_scaled, y_train)

# # Get the best model from RandomizedSearchCV
# best_regressor = random_search.best_estimator_

# # Make predictions on the test set
# sales_data_predictions = best_regressor.predict(X_test_scaled)

# # Calculate R squared value
# r2_sales = metrics.r2_score(y_test, sales_data_predictions)
# print('R Squared value =', r2_sales)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8523 entries, 0 to 8522
Data columns (total 39 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Item_Weight                      8523 non-null   float64
 1   Item_MRP                         8523 non-null   float64
 2   Item_Type_Baking Goods           8523 non-null   uint8  
 3   Item_Type_Breads                 8523 non-null   uint8  
 4   Item_Type_Breakfast              8523 non-null   uint8  
 5   Item_Type_Canned                 8523 non-null   uint8  
 6   Item_Type_Dairy                  8523 non-null   uint8  
 7   Item_Type_Frozen Foods           8523 non-null   uint8  
 8   Item_Type_Fruits and Vegetables  8523 non-null   uint8  
 9   Item_Type_Hard Drinks            8523 non-null   uint8  
 10  Item_Type_Health and Hygiene     8523 non-null   uint8  
 11  Item_Type_Household              8523 non-null   uint8  
 12  Item_Type_Meat      