# SVM

In [1]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset
file_path = r'..\Smart_City_index_headers.csv'
data = pd.read_csv(file_path)

In [2]:
# Prepare the data (drop non-numeric and target columns)
X = data.drop(columns=['Id', 'City', 'Country', 'SmartCity_Index', 'SmartCity_Index_relative_Edmonton'])
y = data['SmartCity_Index']

# Apply Standard Scaling to the features (scaling helps with kernel-based methods like RBF)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Fit SVR model with RBF kernel and increased regularization
svr = SVR(kernel='linear', C=8)  # Increased C to capture more complexity
svr.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svr.predict(X_test)

In [4]:
# Calculate the absolute value of coefficients (since negative values indicate importance as well)
coefficients = np.abs(svr.coef_[0])
# Normalize the coefficients to sum to 1, similar to Random Forest feature_importances_
normalized_importance = coefficients / np.sum(coefficients)

# Create a pandas series for feature importance with the feature names
feature_importance = pd.Series(normalized_importance, index=X.columns).sort_values(ascending=False)

# Output the feature importance
print(feature_importance)


Smart_Living         0.259014
Smart_Mobility       0.170147
Smart_Environment    0.163159
Smart_Economy        0.149919
Smart_People         0.137956
Smart_Government     0.119805
dtype: float64


In [5]:
# Evaluate the model on the test data
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Output the results
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared (R²): {r2}')

# Cross-validation (5-fold)
cv_scores = cross_val_score(svr, X_scaled, y, cv=5, scoring='r2')
print(f"Average R² across 5 folds: {np.mean(cv_scores)}")


Mean Squared Error (MSE): 8990.17523063486
Root Mean Squared Error (RMSE): 94.81653458461167
R-squared (R²): 0.9850289487144502
Average R² across 5 folds: 0.7472900119408649
