In [44]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge

# Load the dataset
file_path = '/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv'
data = pd.read_csv(file_path, delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical columns for one-hot encoding
categorical_columns = X.select_dtypes(include=['object', 'category']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# Creating the Ridge regression model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('ridge_regression', Ridge())
])

# Setting up parameter grid for hyperparameter tuning
param_grid = {'ridge_regression__alpha': [0.01, 0.1, 1, 10, 100]}  # alpha is equivalent to λ

# Setting up GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

# Fitting the model
grid_search.fit(X, y)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Best parameters: {'ridge_regression__alpha': 100}
Best cross-validation score: -18.694163477538883


In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR  # Using SVR for regression

# Load the dataset with the correct delimiter
data = pd.read_csv('/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv', delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical columns for one-hot encoding
categorical_columns = X.select_dtypes(include=['object', 'category']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# Creating the SVM model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('svm', SVR(C=1.0))
])

# Setting up parameter grid for hyperparameter tuning (C values to test)
param_grid = {'svm__C': [0.1, 1, 10, 100, 1000]}

# Setting up GridSearchCV for 5-fold cross-validation and hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

# Fitting the model
grid_search.fit(X, y)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Best parameters: {'svm__C': 10}
Best cross-validation score: -18.551234630666308


In [3]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
# Load the dataset
data = pd.read_csv('/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv', delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical columns for one-hot encoding
categorical_columns = X.select_dtypes(include=['object', 'category']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# Creating the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('random_forest', RandomForestRegressor())
])

# Define the parameter grid to test different hyperparameter values
param_grid = {
    'random_forest__n_estimators': [10, 50, 100, 200, 500]
}

# Setting Up GridSearchCV to find the best model configuration using 5-fold cross-validation
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X, y)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Best parameters: {'random_forest__n_estimators': 200}
Best cross-validation score: -17.40701063291139


In [4]:
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv', delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical columns for one-hot encoding
categorical_columns = X.select_dtypes(include=['object', 'category']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ],
    remainder='passthrough'
)

# Creating a modeling pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('knn', KNeighborsRegressor())
])

# Define the parameter grid to test different values for n_neighbors
param_grid = {'knn__n_neighbors': [1, 3, 5, 7, 9]}

# Setting up GridSearchCV for 5-fold cross-validation and hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

# Fitting the model
grid_search.fit(X, y)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Best parameters: {'knn__n_neighbors': 9}
Best cross-validation score: -18.913767776215032


In [24]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv', delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical columns and numerical columns for preprocessing
categorical_columns = X.select_dtypes(include=['object', 'category']).columns
numerical_columns = X.select_dtypes(include=['int', 'float']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns),
        ('scaler', StandardScaler(), numerical_columns)
    ],
    remainder='passthrough'
)

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('ann', MLPRegressor(random_state=42, max_iter=1000, verbose=True, early_stopping=True))
])

# Define the parameter grid to test different learning rates
param_grid = {
    'ann__learning_rate_init': [0.1, 0.01, 0.001]
}

# Setting up GridSearchCV for 5-fold cross-validation and hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

# Fitting the model
grid_search.fit(X, y)

# Printing the best cross-validation score
print("Best cross-validation score:", grid_search.best_score_)

Iteration 1, loss = 69.61174673
Validation score: 0.000743
Iteration 2, loss = 18.51896598
Validation score: -1.569524
Iteration 3, loss = 36.18521855
Validation score: -1.166574
Iteration 4, loss = 27.84950226
Validation score: -0.183533
Iteration 5, loss = 13.71939936
Validation score: -0.361598
Iteration 6, loss = 18.62598903
Validation score: -0.172085
Iteration 7, loss = 14.83052138
Validation score: 0.279943
Iteration 8, loss = 9.82148112
Validation score: -0.087710
Iteration 9, loss = 13.69719743
Validation score: -0.070536
Iteration 10, loss = 12.66377458
Validation score: 0.229962
Iteration 11, loss = 9.57018511
Validation score: 0.222199
Iteration 12, loss = 10.80301198
Validation score: 0.196730
Iteration 13, loss = 9.79634619
Validation score: 0.280399
Iteration 14, loss = 8.33340239
Validation score: 0.171311
Iteration 15, loss = 9.34532658
Validation score: 0.138010
Iteration 16, loss = 9.13309523
Validation score: 0.198925
Iteration 17, loss = 8.08142283
Validation score

Iteration 17, loss = 7.00088497
Validation score: 0.304366
Iteration 18, loss = 6.72740949
Validation score: 0.312458
Iteration 19, loss = 6.47397283
Validation score: 0.305456
Iteration 20, loss = 6.37380256
Validation score: 0.290068
Iteration 21, loss = 6.39739185
Validation score: 0.282247
Iteration 22, loss = 6.36701447
Validation score: 0.289216
Iteration 23, loss = 6.27696973
Validation score: 0.301637
Iteration 24, loss = 6.13704403
Validation score: 0.311065
Iteration 25, loss = 6.13548377
Validation score: 0.313909
Iteration 26, loss = 6.07444647
Validation score: 0.310319
Iteration 27, loss = 6.01719936
Validation score: 0.307558
Iteration 28, loss = 5.95481070
Validation score: 0.302116
Iteration 29, loss = 5.88030252
Validation score: 0.294773
Iteration 30, loss = 5.87422187
Validation score: 0.288118
Iteration 31, loss = 5.83153164
Validation score: 0.289312
Iteration 32, loss = 5.77173569
Validation score: 0.295381
Iteration 33, loss = 5.72511549
Validation score: 0.3022

Iteration 74, loss = 8.42017390
Validation score: 0.213103
Iteration 75, loss = 8.40143735
Validation score: 0.213837
Iteration 76, loss = 8.38077571
Validation score: 0.214632
Iteration 77, loss = 8.36015536
Validation score: 0.215108
Iteration 78, loss = 8.34063162
Validation score: 0.215595
Iteration 79, loss = 8.32208702
Validation score: 0.216047
Iteration 80, loss = 8.30204540
Validation score: 0.216706
Iteration 81, loss = 8.28185198
Validation score: 0.217450
Iteration 82, loss = 8.26135434
Validation score: 0.218453
Iteration 83, loss = 8.24182305
Validation score: 0.219411
Iteration 84, loss = 8.22255074
Validation score: 0.220162
Iteration 85, loss = 8.20177497
Validation score: 0.220549
Iteration 86, loss = 8.18205609
Validation score: 0.220873
Iteration 87, loss = 8.16395124
Validation score: 0.221471
Iteration 88, loss = 8.14335313
Validation score: 0.222174
Iteration 89, loss = 8.12324997
Validation score: 0.223259
Iteration 90, loss = 8.10801030
Validation score: 0.2242

Iteration 122, loss = 6.00091716
Validation score: 0.281293
Iteration 123, loss = 5.98662977
Validation score: 0.280806
Iteration 124, loss = 5.97378968
Validation score: 0.280193
Iteration 125, loss = 5.96208408
Validation score: 0.280145
Iteration 126, loss = 5.94537400
Validation score: 0.280821
Iteration 127, loss = 5.93150332
Validation score: 0.281645
Iteration 128, loss = 5.91883971
Validation score: 0.282271
Iteration 129, loss = 5.90376491
Validation score: 0.282396
Iteration 130, loss = 5.88923100
Validation score: 0.282410
Iteration 131, loss = 5.87535234
Validation score: 0.282403
Iteration 132, loss = 5.86108132
Validation score: 0.282650
Iteration 133, loss = 5.84761795
Validation score: 0.283031
Iteration 134, loss = 5.83461299
Validation score: 0.283642
Iteration 135, loss = 5.82109174
Validation score: 0.284100
Iteration 136, loss = 5.80534481
Validation score: 0.284270
Iteration 137, loss = 5.79107372
Validation score: 0.284265
Iteration 138, loss = 5.77750251
Validat

In [27]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('/Users/jamescheng/Desktop/WASHU/CSE 514/student+performance/student/student-mat.csv', delimiter=';')

# Drop 'G1' and 'G2' columns
data = data.drop(['G1', 'G2'], axis=1)

# Separate the features and the target variable
X = data.drop('G3', axis=1)
y = data['G3']

# Identifying categorical and numerical columns for preprocessing
categorical_columns = X.select_dtypes(include=['object', 'category']).columns
numerical_columns = X.select_dtypes(include=['int', 'float']).columns

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_columns),
        ('scaler', StandardScaler(), numerical_columns)
    ],
    remainder='passthrough'
)

# Creating a modeling pipeline with XGBoost
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('xgb', XGBRegressor(objective='reg:squarederror', random_state=42))
])

# If you want to fine-tune hyperparameters, define a parameter grid
param_grid = {
    'xgb__learning_rate': [0.01, 0.1, 0.2, 0.3, 0.4],
}

# Setting up GridSearchCV for hyperparameter tuning (optional)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

# Fitting the model
grid_search.fit(X, y)

# Printing the best parameters and cross-validation score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

Best parameters: {'xgb__learning_rate': 0.01}
Best cross-validation score: -17.960239066574957
