# Hyperparameters

In [1]:
import warnings

# To ignore all warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

## Linear Regression

In [3]:
from sklearn.datasets import make_regression

# Generating a synthetic dataset with 100 data points
X, y = make_regression(n_samples=100, n_features=20, noise=25)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'fit_intercept': [True, False],
    'positive': [True, False]
}

# Set a large number value as default
best_score = np.inf
best_params = {}

# Perform grid search over the hyperparameters
for fit_intercept in hyperparameters['fit_intercept']:
    for positive in hyperparameters['positive']:
        # Create the model with the current hyperparameters
        model = LinearRegression(fit_intercept=fit_intercept, positive=positive)

        # Train the model
        model.fit(X_train, y_train)

        # Make predictions
        predictions = model.predict(X_test)

        # Calculate the mean squared error (Focus metric)
        mse = mean_squared_error(y_test, predictions)

        print(f"Configuration: 'fit_intercept': {fit_intercept}, 'positive': {positive} | 'MSE': {mse:,.2f}, 'Score': {model.score(X_test, y_test):.2f}")

        # Check if this is the best parameter combination
        if mse < best_score:
            best_score = round(mse, 2)
            best_params = {'fit_intercept': fit_intercept, 'positive': positive}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best mean squared error:", best_score)

Configuration: 'fit_intercept': True, 'positive': True | 'MSE': 819.50, 'Score': 0.98
Configuration: 'fit_intercept': True, 'positive': False | 'MSE': 879.27, 'Score': 0.97
Configuration: 'fit_intercept': False, 'positive': True | 'MSE': 820.70, 'Score': 0.98
Configuration: 'fit_intercept': False, 'positive': False | 'MSE': 882.58, 'Score': 0.97

-------------------------
Result:
Best hyperparameters: {'fit_intercept': True, 'positive': True}
Best mean squared error: 819.5


## Logistic Regression

In [4]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10]
}

best_accuracy = 0
best_params = {}

# Perform grid search over the hyperparameters
for penalty in hyperparameters['penalty']:
    for C in hyperparameters['C']:
        # Create the model with the current hyperparameters
        model = LogisticRegression(penalty=penalty, C=C, solver='liblinear')

        # Train the model
        model.fit(X_train, y_train)

        # Make predictions
        predictions = model.predict(X_test)

        # Calculate accuracy
        accuracy = accuracy_score(y_test, predictions)

        print(f"Configuration: 'penalty': {penalty}, 'C': {C} | 'Accuracy': {accuracy:.2f}")

        # Check if this is the best parameter combination
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'penalty': penalty, 'C': C}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best accuracy:", best_accuracy)

Configuration: 'penalty': l1, 'C': 0.001 | 'Accuracy': 0.56
Configuration: 'penalty': l1, 'C': 0.01 | 'Accuracy': 0.77
Configuration: 'penalty': l1, 'C': 0.1 | 'Accuracy': 0.83
Configuration: 'penalty': l1, 'C': 1 | 'Accuracy': 0.83
Configuration: 'penalty': l1, 'C': 10 | 'Accuracy': 0.83
Configuration: 'penalty': l2, 'C': 0.001 | 'Accuracy': 0.80
Configuration: 'penalty': l2, 'C': 0.01 | 'Accuracy': 0.82
Configuration: 'penalty': l2, 'C': 0.1 | 'Accuracy': 0.83
Configuration: 'penalty': l2, 'C': 1 | 'Accuracy': 0.83
Configuration: 'penalty': l2, 'C': 10 | 'Accuracy': 0.83

-------------------------
Result:
Best hyperparameters: {'penalty': 'l1', 'C': 1}
Best accuracy: 0.835


## Decision Tree Classifier

In [5]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

best_accuracy = 0
best_params = {}

# Perform grid search over the hyperparameters
for criterion in hyperparameters['criterion']:
    for max_depth in hyperparameters['max_depth']:
        for min_samples_split in hyperparameters['min_samples_split']:
            for min_samples_leaf in hyperparameters['min_samples_leaf']:
                # Create the model with the current hyperparameters
                model = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth,
                                               min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)

                # Train the model
                model.fit(X_train, y_train)

                # Make predictions
                predictions = model.predict(X_test)

                # Calculate accuracy
                accuracy = accuracy_score(y_test, predictions)

                print(f"Configuration: 'criterion': {criterion}, 'max_depth': {max_depth}, 'min_samples_split': {min_samples_split}, 'min_samples_leaf': {min_samples_leaf} | 'Accuracy': {accuracy:.2f}")

                # Check if this is the best parameter combination
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = {'criterion': criterion, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best accuracy:", best_accuracy)

Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1 | 'Accuracy': 0.90
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2 | 'Accuracy': 0.90
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 4 | 'Accuracy': 0.91
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 1 | 'Accuracy': 0.90
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 2 | 'Accuracy': 0.91
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 4 | 'Accuracy': 0.91
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 10, 'min_samples_leaf': 1 | 'Accuracy': 0.91
Configuration: 'criterion': gini, 'max_depth': None, 'min_samples_split': 10, 'min_samples_leaf': 2 | 'Accuracy': 0.91
Configuration: 'criterion': gini, 'max_depth': None, '

## Support Vector Machine

In [6]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

best_accuracy = 0
best_params = {}

# Perform grid search over the hyperparameters
for C in hyperparameters['C']:
    for kernel in hyperparameters['kernel']:
        for gamma in hyperparameters['gamma']:
            # Create the model with the current hyperparameters
            model = SVC(C=C, kernel=kernel, gamma=gamma)

            # Train the model
            model.fit(X_train, y_train)

            # Make predictions
            predictions = model.predict(X_test)

            # Calculate accuracy
            accuracy = accuracy_score(y_test, predictions)

            print(f"Configuration: 'C': {C}, 'kernel': {kernel}, 'gamma': {gamma} | 'Accuracy': {accuracy:.2f}")

            # Check if this is the best parameter combination
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {'C': C, 'kernel': kernel, 'gamma': gamma}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best accuracy:", best_accuracy)

Configuration: 'C': 0.1, 'kernel': linear, 'gamma': scale | 'Accuracy': 0.85
Configuration: 'C': 0.1, 'kernel': linear, 'gamma': auto | 'Accuracy': 0.85
Configuration: 'C': 0.1, 'kernel': rbf, 'gamma': scale | 'Accuracy': 0.92
Configuration: 'C': 0.1, 'kernel': rbf, 'gamma': auto | 'Accuracy': 0.92
Configuration: 'C': 1, 'kernel': linear, 'gamma': scale | 'Accuracy': 0.84
Configuration: 'C': 1, 'kernel': linear, 'gamma': auto | 'Accuracy': 0.84
Configuration: 'C': 1, 'kernel': rbf, 'gamma': scale | 'Accuracy': 0.92
Configuration: 'C': 1, 'kernel': rbf, 'gamma': auto | 'Accuracy': 0.94
Configuration: 'C': 10, 'kernel': linear, 'gamma': scale | 'Accuracy': 0.85
Configuration: 'C': 10, 'kernel': linear, 'gamma': auto | 'Accuracy': 0.85
Configuration: 'C': 10, 'kernel': rbf, 'gamma': scale | 'Accuracy': 0.94
Configuration: 'C': 10, 'kernel': rbf, 'gamma': auto | 'Accuracy': 0.96

-------------------------
Result:
Best hyperparameters: {'C': 10, 'kernel': 'rbf', 'gamma': 'auto'}
Best accura

## k-Nearest Neighbors

In [7]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

best_accuracy = 0
best_params = {}

# Perform grid search over the hyperparameters
for n_neighbors in hyperparameters['n_neighbors']:
    for weights in hyperparameters['weights']:
        for algorithm in hyperparameters['algorithm']:
            # Create the model with the current hyperparameters
            model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)

            # Train the model
            model.fit(X_train, y_train)

            # Make predictions
            predictions = model.predict(X_test)

            # Calculate accuracy
            accuracy = accuracy_score(y_test, predictions)

            print(f"Configuration: 'n_neighbors': {n_neighbors}, 'weights': {weights}, 'algorithm': {algorithm} | 'Accuracy': {accuracy:.2f}")

            # Check if this is the best parameter combination
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {'n_neighbors': n_neighbors, 'weights': weights, 'algorithm': algorithm}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best accuracy:", best_accuracy)

Configuration: 'n_neighbors': 3, 'weights': uniform, 'algorithm': auto | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': uniform, 'algorithm': ball_tree | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': uniform, 'algorithm': kd_tree | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': uniform, 'algorithm': brute | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': distance, 'algorithm': auto | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': distance, 'algorithm': ball_tree | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': distance, 'algorithm': kd_tree | 'Accuracy': 0.92
Configuration: 'n_neighbors': 3, 'weights': distance, 'algorithm': brute | 'Accuracy': 0.92
Configuration: 'n_neighbors': 5, 'weights': uniform, 'algorithm': auto | 'Accuracy': 0.92
Configuration: 'n_neighbors': 5, 'weights': uniform, 'algorithm': ball_tree | 'Accuracy': 0.92
Configuration: 'n_neighbors': 5, 'weights': uniform, 'algorithm': kd_tree

## KMeans

In [8]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Generating a synthetic dataset
X, _ = make_blobs(n_samples=1000, centers=5, n_features=2, random_state=42)

# Define a list of hyperparameters to tune
hyperparameters = {
    'n_clusters': [3, 4, 5, 6, 7],
    'init': ['k-means++', 'random']
}

best_silhouette_score = -1
best_params = {}

# Perform grid search over the hyperparameters
for n_clusters in hyperparameters['n_clusters']:
    for init in hyperparameters['init']:
        # Create the model with the current hyperparameters
        model = KMeans(n_clusters=n_clusters, init=init, random_state=42)

        # Fit the model
        model.fit(X)

        # Get cluster labels
        cluster_labels = model.labels_

        # Calculate silhouette score (measure of how well-defined the clusters are)
        silhouette = silhouette_score(X, cluster_labels)

        print(f"Configuration: 'n_clusters': {n_clusters}, 'init': {init} | 'Silhouette Score': {silhouette:.4f}")

        # Check if this is the best parameter combination
        if silhouette > best_silhouette_score:
            best_silhouette_score = silhouette
            best_params = {'n_clusters': n_clusters, 'init': init}

print(f"\n{'-'*25}\nResult:")
print("Best hyperparameters:", best_params)
print("Best silhouette score:", best_silhouette_score)

Configuration: 'n_clusters': 3, 'init': k-means++ | 'Silhouette Score': 0.7011
Configuration: 'n_clusters': 3, 'init': random | 'Silhouette Score': 0.7011
Configuration: 'n_clusters': 4, 'init': k-means++ | 'Silhouette Score': 0.7270
Configuration: 'n_clusters': 4, 'init': random | 'Silhouette Score': 0.7270
Configuration: 'n_clusters': 5, 'init': k-means++ | 'Silhouette Score': 0.6779
Configuration: 'n_clusters': 5, 'init': random | 'Silhouette Score': 0.6779
Configuration: 'n_clusters': 6, 'init': k-means++ | 'Silhouette Score': 0.5963
Configuration: 'n_clusters': 6, 'init': random | 'Silhouette Score': 0.5976
Configuration: 'n_clusters': 7, 'init': k-means++ | 'Silhouette Score': 0.4919
Configuration: 'n_clusters': 7, 'init': random | 'Silhouette Score': 0.4930

-------------------------
Result:
Best hyperparameters: {'n_clusters': 4, 'init': 'k-means++'}
Best silhouette score: 0.7270405117710104


# Hyperparameter Search
## GridSearch

In [9]:
from sklearn.model_selection import GridSearchCV

## Linear Regresion

In [10]:
# Generating a synthetic dataset with 100 data points
X, y = make_regression(n_samples=100, n_features=20, noise=25, random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'fit_intercept': [True, False],
    'positive': [True, False]
}

# Create the model
model = LinearRegression()

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X, y)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = -grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best mean squared error:", best_score)

Best hyperparameters: {'fit_intercept': False, 'positive': True}
Best mean squared error: 722.8188373406498


## Logistic Regression

In [11]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Define a parameter grid for GridSearchCV
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10]
}

# Create the model
model = LogisticRegression(solver='liblinear')

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best accuracy:", best_score)

Best hyperparameters: {'C': 0.1, 'penalty': 'l1'}
Best accuracy: 0.8210000000000001


## Decision Tree Classifier

In [12]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Define a parameter grid for GridSearchCV
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the model
model = DecisionTreeClassifier()

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best accuracy:", best_score)

Best hyperparameters: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}
Best accuracy: 0.916


## Suport Vector Machine

In [13]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Create the model
model = SVC()

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best accuracy:", best_score)

Best hyperparameters: {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}
Best accuracy: 0.9349999999999999


## k-Nearest Neighbors

In [14]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Generating a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a parameter grid for GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Create the model
model = KNeighborsClassifier()

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best accuracy:", best_score)

Best hyperparameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}
Best accuracy: 0.9179999999999999


## KMeans

In [15]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import silhouette_score

# Generating a synthetic dataset
X, _ = make_blobs(n_samples=1000, centers=5, n_features=2, random_state=42)

# Define a parameter grid for GridSearchCV
param_grid = {
    'n_clusters': [3, 4, 5, 6, 7],
    'init': ['k-means++', 'random']
}

# Create the model
model = KMeans()

# Perform grid search
grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error')  # Use an appropriate metric for evaluation
grid_search.fit(X)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best hyperparameters:", best_params)
print("Best score (neg_mean_squared_error):", best_score)

Best hyperparameters: {'init': 'k-means++', 'n_clusters': 3}
Best score (neg_mean_squared_error): nan


# Optimization Algorithm

In [61]:
import numpy as np

# Generating some random data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)


def gradient_descent(X, y, learning_rate, n_iterations):
    m = X.shape[0]  # number of instances
    theta = np.random.randn(2, 1)  # random initialization of theta (including bias term)
    X_b = np.c_[np.ones((m, 1)), X]  # adding bias term to features

    for iteration in range(n_iterations):
        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
        theta -= learning_rate * gradients

    return theta


learning_rate = 0.1
n_iterations = 1000

theta = gradient_descent(X, y, learning_rate, n_iterations)
print("Optimal theta (bias, coefficient):", theta.ravel())

Optimal theta (bias, coefficient): [4.21509616 2.77011339]


## Using Sklearn

In [57]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Generate a synthetic dataset
X, y = make_regression(n_samples=1000, n_features=1000, noise=50, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the linear regression model without gradient descent
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Calculate mean squared error
mse = mean_squared_error(y_test, predictions)
r2 = model.score(X_test, y_test)
print("Mean Squared Error (without Gradient Descent):", mse)
print("R2 (without Gradient Descent):", r2)

Mean Squared Error (without Gradient Descent): 20552.995779983765
R2 (without Gradient Descent): 0.4159166755545346


In [58]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the linear regression model with gradient descent
model = SGDRegressor(loss='huber', max_iter=1000, learning_rate='optimal')
model.fit(X_train_scaled, y_train)

# Make predictions
predictions = model.predict(X_test_scaled)

# Calculate mean squared error
mse = mean_squared_error(y_test, predictions)
r2 = model.score(X_test_scaled, y_test)
print("Mean Squared Error (with Gradient Descent):", mse)
print("R2 (with Gradient Descent):", r2)

Mean Squared Error (with Gradient Descent): 17577.659440818334
R2 (with Gradient Descent): 0.5004709837890357
