In [None]:
## import packages
import pandas as pd
import numpy as np

from xgboost import XGBClassifier, XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import svm

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer, KNNImputer
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import MinMaxScaler 
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

### Preprocessing and EDA

In [None]:
## import csv
df = pd.read_csv("ESS10.csv")

In [None]:
df.shape

In [None]:
df.head()

In [None]:
## Replacing 7, 8 and 9 with NaN to correct reflect those values as missing
df[["scidecpb", "secgrdec", "gvconc19", "netusoft", "psppsgva", "health"]] = df[["scidecpb", "secgrdec", "gvconc19", "netusoft", "psppsgva", "health"]].replace([7, 8, 9], np.nan)

In [None]:
## Replacing 77, 88 and 99 with NaN to correct reflect those values as missing
df[["trstprl", "trstlgl", "trstplc", "trstplt", "trstprt", "trstep", "trstun", "trstsci", "fairelc", "medcrgv", "happy", "sclmeet", "stflife", "stfeco", "stfgov", "stfdem", "stfedu", "stfhlth"]] = df[["trstprl", "trstlgl", "trstplc", "trstplt", "trstprt", "trstep", "trstun", "trstsci", "fairelc", "medcrgv", "happy", "sclmeet", "stflife", "stfeco", "stfgov", "stfdem", "stfedu", "stfhlth"]].replace([77, 88, 99], np.nan)

In [None]:
## Flip health variable around to reflect that high numbers are a positive response
df["health"] = df["health"].map({1:5, 2:4, 3:3, 4:2, 5:1})

### Missing value imputation

In [None]:
## Using MICE imputer to impute missing data
imputer = SimpleImputer()

In [None]:
## Trim csv to only include necessary values
df_trimmed = df[["scidecpb", "secgrdec", "gvconc19", "netusoft", "netustm", "psppsgva", "trstprl", "trstlgl", "trstplc", 
                 "trstplt", "trstprt", "trstep", "trstun", "trstsci", "fairelc", "medcrgv", "happy", "sclmeet",
                "health", "stflife", "stfeco", "stfgov", "stfdem", "stfedu", "stfhlth"]]

In [None]:
df_trimmed = df_trimmed.dropna(subset=["secgrdec"])

In [None]:
df_imputed = pd.DataFrame(imputer.fit_transform(df_trimmed), columns=df_trimmed.columns)

### Defining predictor groups

In [None]:
## Define predictors
trust_features = df_imputed[["psppsgva", "trstprl", "trstlgl", "trstplc", "trstplt", "trstprt", "trstep", "trstun", "trstsci", "fairelc", "medcrgv"]]

In [None]:
struggle_features = df_imputed[["happy", "sclmeet", "health"]]

In [None]:
discontent_features = df_imputed[["stflife", "stfeco", "stfgov", "stfdem", "stfedu", "stfhlth"]]

In [None]:
## Define decision class
y = df_imputed[["secgrdec"]]

In [None]:
## Define predictors
trust_features_internet = df_imputed[["netusoft", "psppsgva", "trstprl", "trstlgl", "trstplc", "trstplt", "trstprt", "trstep", "trstun", "trstsci", "fairelc", "medcrgv"]]

In [None]:
struggle_features_internet = df_imputed[["netusoft", "happy", "sclmeet", "health"]]

In [None]:
discontent_features_internet = df_imputed[["netusoft", "stflife", "stfeco", "stfgov", "stfdem", "stfedu", "stfhlth"]]

In [None]:
internet_features = df_imputed[["netusoft"]]

## EDA

In [None]:
## Find NA's in decision class
df["secgrdec"].isnull().sum()

In [None]:
## Missing values per feature

df_trimmed.isnull().sum()

In [None]:
# Get the value counts for decision class
value_counts = y.value_counts().sort_index()

# Plot the bar graph
plt.figure(figsize=(10, 6))
value_counts.plot(kind='bar')
plt.title('Distribution of decision class')
plt.xlabel('Answers to "A small secret group of people is responsible for making all major decisions in world politics." from 1 (maximum agree) to 5 (')
plt.ylabel('Counts')
plt.show()

### Error Analysis with Trust in Government

In [None]:
## Validate trust
trust_train, trust_val, y_train, y_val = train_test_split(trust_features, y, test_size=0.2, random_state=42)

In [None]:
xgb_model = XGBRegressor()
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
xgb_cv_results = cross_val_score(xgb_model, trust_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"XGBoost CV Root Mean Squared Error: {-xgb_cv_results.mean()}")

xgb_model.fit(trust_train, y_train)
xgb_predictions = xgb_model.predict(trust_val)

In [None]:
svm_model = svm.SVR()
svm_cv_results = cross_val_score(svm_model, trust_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"SVM CV Root Mean Squared Error: {-svm_cv_results.mean()}")

svm_model.fit(trust_train, y_train)
svm_predictions = svm_model.predict(trust_val)

In [None]:
rf_model = RandomForestRegressor()
rf_cv_results = cross_val_score(rf_model, trust_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Random Forest CV Root Mean Squared Error: {-rf_cv_results.mean()}")

rf_model.fit(trust_train, y_train)
rf_predictions = rf_model.predict(trust_val)

In [None]:
lr_model = LinearRegression()
lr_cv_results = cross_val_score(lr_model, trust_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Linear Regression CV Root Mean Squared Error: {-lr_cv_results.mean()}")

lr_model.fit(trust_train, y_train)
lr_predictions = lr_model.predict(trust_val)

In [None]:
# Error analysis for XGBoost
xgb_mse = mean_squared_error(y_val, xgb_predictions, squared = False)
print(f"XGBoost RMSE: {xgb_mse}")

# Error analysis for SVM
svm_mse = mean_squared_error(y_val, svm_predictions, squared = False)
print(f"SVM RMSE: {svm_mse}")

# Error analysis for Random Forest
rf_mse = mean_squared_error(y_val, rf_predictions, squared = False)
print(f"Random Forest RMSE: {rf_mse}")

# Error analysis for Linear Regression
lr_mse = mean_squared_error(y_val, lr_predictions, squared = False)
print(f"Linear Regression RMSE: {lr_mse}")

In [None]:
## Validate struggle
struggle_train, struggle_val, y_train, y_val = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
xgb_model = XGBRegressor()
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
xgb_cv_results = cross_val_score(xgb_model, struggle_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"XGBoost CV Root Mean Squared Error: {-xgb_cv_results.mean()}")

xgb_model.fit(struggle_train, y_train)
xgb_predictions = xgb_model.predict(struggle_val)

In [None]:
svm_model = svm.SVR()
svm_cv_results = cross_val_score(svm_model, struggle_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"SVM CV Root Mean Squared Error: {-svm_cv_results.mean()}")

svm_model.fit(struggle_train, y_train)
svm_predictions = svm_model.predict(struggle_val)

In [None]:
rf_model = RandomForestRegressor()
rf_cv_results = cross_val_score(rf_model, struggle_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Random Forest CV Root Mean Squared Error: {-rf_cv_results.mean()}")

rf_model.fit(struggle_train, y_train)
rf_predictions = rf_model.predict(struggle_val)

In [None]:
lr_model = LinearRegression()
lr_cv_results = cross_val_score(lr_model, struggle_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Linear Regression CV Root Mean Squared Error: {-lr_cv_results.mean()}")

lr_model.fit(struggle_train, y_train)
lr_predictions = lr_model.predict(struggle_val)

In [None]:
# Error analysis for XGBoost
xgb_mse = mean_squared_error(y_val, xgb_predictions, squared = False)
print(f"XGBoost RMSE: {xgb_mse}")

# Error analysis for SVM
svm_mse = mean_squared_error(y_val, svm_predictions, squared = False)
print(f"SVM RMSE: {svm_mse}")

# Error analysis for Random Forest
rf_mse = mean_squared_error(y_val, rf_predictions, squared = False)
print(f"Random Forest RMSE: {rf_mse}")

# Error analysis for Linear Regression
lr_mse = mean_squared_error(y_val, lr_predictions, squared = False)
print(f"Linear Regression RMSE: {lr_mse}")

In [None]:
## Validate discontent
discontent_train, discontent_val, y_train, y_val = train_test_split(discontent_features, y, test_size=0.2, random_state=42)

In [None]:
xgb_model = XGBRegressor()
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
xgb_cv_results = cross_val_score(xgb_model, discontent_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"XGBoost CV Mean Squared Error: {-xgb_cv_results.mean()}")

xgb_model.fit(discontent_train, y_train)
xgb_predictions = xgb_model.predict(discontent_val)

In [None]:
svm_model = svm.SVR()
svm_cv_results = cross_val_score(svm_model, discontent_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"SVM CV Mean Squared Error: {-svm_cv_results.mean()}")

svm_model.fit(discontent_train, y_train)
svm_predictions = svm_model.predict(discontent_val)

In [None]:
rf_model = RandomForestRegressor()
rf_cv_results = cross_val_score(rf_model, discontent_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Random Forest CV Mean Squared Error: {-rf_cv_results.mean()}")

rf_model.fit(discontent_train, y_train)
rf_predictions = rf_model.predict(discontent_val)

In [None]:
lr_model = LinearRegression()
lr_cv_results = cross_val_score(lr_model, discontent_train, y_train, cv=kfold, scoring='neg_root_mean_squared_error')
print(f"Linear Regression CV Mean Squared Error: {-lr_cv_results.mean()}")

lr_model.fit(discontent_train, y_train)
lr_predictions = lr_model.predict(discontent_val)

In [None]:
# Error analysis for XGBoost
xgb_mse = mean_squared_error(y_val, xgb_predictions, squared = False)
print(f"XGBoost RMSE: {xgb_mse}")

# Error analysis for SVM
svm_mse = mean_squared_error(y_val, svm_predictions, squared = False)
print(f"SVM RMSE: {svm_mse}")

# Error analysis for Random Forest
rf_mse = mean_squared_error(y_val, rf_predictions, squared = False)
print(f"Random Forest RMSE: {rf_mse}")

# Error analysis for Linear Regression
lr_mse = mean_squared_error(y_val, lr_predictions, squared = False)
print(f"Linear Regression RMSE: {lr_mse}")

### Predictions using XGBoost

In [None]:
xgbmodel = XGBRegressor(objective="reg:squarederror")

In [None]:
# Define the parameter grid to search
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [200, 500, 1000],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

In [None]:
# Set up the GridSearchCV
grid_search = GridSearchCV(estimator=xgbmodel, 
                           param_grid=param_grid,
                           cv=5,  # 5-fold cross-validation
                           scoring='neg_mean_squared_error',  # scoring metric
                           verbose=1,
                           n_jobs=-1)

In [None]:
## Predict trust
trust_train, trust_test, y_train, y_test = train_test_split(trust_features, y, test_size=0.2, random_state=42)

In [None]:
# Fit the GridSearchCV to the data
grid_search.fit(trust_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

In [None]:
best_params = grid_search.best_params_
xgbmodel = XGBRegressor(objective="reg:squarederror", **best_params)

In [None]:
xgbmodel.fit(trust_train, y_train)
y_pred = xgbmodel.predict(trust_test)

In [None]:
mse_trust = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust}')

In [None]:
## Predict struggle
struggle_train, struggle_test, y_train, y_test = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(struggle_train, y_train)
y_pred = xgbmodel.predict(struggle_test)

In [None]:
mse_struggle = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle}')

In [None]:
## Predict discontent
discontent_train, discontent_test, y_train, y_test = train_test_split(discontent_features, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(discontent_train, y_train)
y_pred = xgbmodel.predict(discontent_test)

In [None]:
mse_discontent = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent}')

### Predictions using XGBoost with time spent on the internet

In [None]:
## Predict trust
trust_internet_train, trust_internet_test, y_train, y_test = train_test_split(trust_features_internet, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(trust_internet_train, y_train)
y_pred = xgbmodel.predict(trust_internet_test)

In [None]:
mse_trust_internet = mean_squared_error(y_test, y_pred,squared = False)
print(f'Root Mean Squared Error: {mse_trust_internet}')

In [None]:
## Predict struggle
struggle_internet_train, struggle_internet_test, y_train, y_test = train_test_split(struggle_features_internet, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(struggle_internet_train, y_train)
y_pred = xgbmodel.predict(struggle_internet_test)

In [None]:
mse_struggle_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle_internet}')

In [None]:
## Predict discontent
discontent_internet_train, discontent_internet_test, y_train, y_test = train_test_split(discontent_features_internet, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(discontent_internet_train, y_train)
y_pred = xgbmodel.predict(discontent_internet_test)

In [None]:
mse_discontent_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent_internet}')

In [None]:
## Predict discontent
internet_train, internet_test, y_train, y_test = train_test_split(internet_features, y, test_size=0.2, random_state=42)

In [None]:
xgbmodel.fit(internet_train, y_train)
y_pred = xgbmodel.predict(internet_test)

In [None]:
mse_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_internet}')

### Predictions using Random Forest Regressor

In [None]:
rfrmodel = RandomForestRegressor(random_state=42)

In [None]:
param_grid_rfr = {
    'n_estimators': [100, 200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
## Predict trust
trust_train, trust_test, y_train, y_test = train_test_split(trust_features, y, test_size=0.2, random_state=42)

In [None]:
grid_search_rfr = GridSearchCV(estimator=rfrmodel, param_grid=param_grid_rfr, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
grid_search_rfr.fit(trust_train, y_train)
print("Best parameters for RFR: ", grid_search_rfr.best_params_)
print("Best score for RFR: ", grid_search_rfr.best_score_)

In [None]:
best_params = grid_search_rfr.best_params_
rfrmodel = RandomForestRegressor(random_state = 42, **best_params)

In [None]:
rfrmodel.fit(trust_train, y_train)
y_pred = rfrmodel.predict(trust_test)

In [None]:
mse_trust = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust}')

In [None]:
## Predict struggle
struggle_train, struggle_test, y_train, y_test = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(struggle_train, y_train)
y_pred = rfrmodel.predict(struggle_test)

In [None]:
mse_struggle = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle}')

In [None]:
## Predict discontent
discontent_train, discontent_test, y_train, y_test = train_test_split(discontent_features, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(discontent_train, y_train)
y_pred = rfrmodel.predict(discontent_test)

In [None]:
mse_discontent = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent}')

### Predictions using RandomForestRegressor with time spent on the internet

In [None]:
## Predict trust
trust_internet_train, trust_internet_test, y_train, y_test = train_test_split(trust_features_internet, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(trust_internet_train, y_train)
y_pred = rfrmodel.predict(trust_internet_test)

In [None]:
mse_trust_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust_internet}')

In [None]:
## Predict struggle
struggle_internet_train, struggle_internet_test, y_train, y_test = train_test_split(struggle_features_internet, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(struggle_internet_train, y_train)
y_pred = rfrmodel.predict(struggle_internet_test)

In [None]:
mse_struggle_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle_internet}')

In [None]:
## Predict discontent
discontent_internet_train, discontent_internet_test, y_train, y_test = train_test_split(discontent_features_internet, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(discontent_internet_train, y_train)
y_pred = rfrmodel.predict(discontent_internet_test)

In [None]:
mse_discontent_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent_internet}')

In [None]:
## Predict discontent
internet_train, internet_test, y_train, y_test = train_test_split(internet_features, y, test_size=0.2, random_state=42)

In [None]:
rfrmodel.fit(internet_train, y_train)
y_pred = rfrmodel.predict(internet_test)

In [None]:
mse_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_internet}')

### Predictions using Linear Regression

In [None]:
lrmodel = LinearRegression()

In [None]:
## Predict trust
trust_train, trust_test, y_train, y_test = train_test_split(trust_features, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(trust_train, y_train)
y_pred = lrmodel.predict(trust_test)

In [None]:
mse_trust = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust}')

In [None]:
## Predict struggle
struggle_train, struggle_test, y_train, y_test = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(struggle_train, y_train)
y_pred = lrmodel.predict(struggle_test)

In [None]:
mse_struggle = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle}')

In [None]:
## Predict discontent
discontent_train, discontent_test, y_train, y_test = train_test_split(discontent_features, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(discontent_train, y_train)
y_pred = lrmodel.predict(discontent_test)

In [None]:
mse_discontent = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent}')

### Predictions using Linear Regression with time spent on the internet

In [None]:
## Predict trust
trust_internet_train, trust_internet_test, y_train, y_test = train_test_split(trust_features_internet, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(trust_internet_train, y_train)
y_pred = lrmodel.predict(trust_internet_test)

In [None]:
mse_trust_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust_internet}')

In [None]:
## Predict struggle
struggle_train, struggle_test, y_train, y_test = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(struggle_internet_train, y_train)
y_pred = lrmodel.predict(struggle_internet_test)

In [None]:
mse_struggle_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle_internet}')

In [None]:
## Predict discontent
discontent_internet_train, discontent_internet_test, y_train, y_test = train_test_split(discontent_features_internet, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(discontent_internet_train, y_train)
y_pred = lrmodel.predict(discontent_internet_test)

In [None]:
mse_discontent_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent_internet}')

In [None]:
## Predict discontent
internet_train, internet_test, y_train, y_test = train_test_split(internet_features, y, test_size=0.2, random_state=42)

In [None]:
lrmodel.fit(internet_train, y_train)
y_pred = lrmodel.predict(internet_test)

In [None]:
mse_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_internet}')

### Predictions using Support Vector Machines

In [None]:
svmmodel = svm.SVR()

In [None]:
param_grid_svm = {
    'C': [1, 10, 100],
    'gamma': [1, 0.1, 0.01],
    'kernel': ['linear', 'rbf', 'sigmoid']
}

In [None]:
## Predict trust
trust_train, trust_test, y_train, y_test = train_test_split(trust_features, y, test_size=0.2, random_state=42)

In [None]:
grid_search_svm = GridSearchCV(estimator=svmmodel, param_grid=param_grid_svm, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
grid_search_svm.fit(trust_train, y_train)
print("Best parameters for SVM: ", grid_search_svm.best_params_)
print("Best score for SVM: ", grid_search_svm.best_score_)

In [None]:
best_params = grid_search_svm.best_params_
svmmodel = svm.SVR(**best_params)

In [None]:
svmmodel.fit(trust_train, y_train)
y_pred = svmmodel.predict(trust_test)

In [None]:
mse_trust = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust}')

In [None]:
## Predict struggle
struggle_train, struggle_test, y_train, y_test = train_test_split(struggle_features, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(struggle_train, y_train)
y_pred = svmmodel.predict(struggle_test)

In [None]:
mse_struggle = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle}')

In [None]:
## Predict discontent
discontent_train, discontent_test, y_train, y_test = train_test_split(discontent_features, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(discontent_train, y_train)
y_pred = svmmodel.predict(discontent_test)

In [None]:
mse_discontent = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent}')

### Predictions using Support Vector Machines with time spent on the internet

In [None]:
## Predict trust
trust_internet_train, trust_internet_test, y_train, y_test = train_test_split(trust_features_internet, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(trust_internet_train, y_train)
y_pred = svmmodel.predict(trust_internet_test)

In [None]:
mse_trust_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_trust_internet}')

In [None]:
## Predict struggle
struggle_internet_train, struggle_internet_test, y_train, y_test = train_test_split(struggle_features_internet, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(struggle_internet_train, y_train)
y_pred = svmmodel.predict(struggle_internet_test)

In [None]:
mse_struggle_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_struggle_internet}')

In [None]:
## Predict discontent
discontent_internet_train, discontent_internet_test, y_train, y_test = train_test_split(discontent_features_internet, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(discontent_internet_train, y_train)
y_pred = svmmodel.predict(discontent_internet_test)

In [None]:
mse_discontent_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_discontent_internet}')

In [None]:
## Predict discontent
internet_train, internet_test, y_train, y_test = train_test_split(internet_features, y, test_size=0.2, random_state=42)

In [None]:
svmmodel.fit(internet_train, y_train)
y_pred = svmmodel.predict(internet_test)

In [None]:
mse_internet = mean_squared_error(y_test, y_pred, squared = False)
print(f'Root Mean Squared Error: {mse_internet}')