<a href="https://colab.research.google.com/github/SAKETH-ADILLA/Machine-Learning/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Build a Multiple Linear Regression model on a dataset (eg: 50_startups)

In [9]:
import pandas as pd
import numpy as np

file_path = 'archive.csv'
data = pd.read_csv(file_path)

unique_states = data['State'].unique()
state_onehot = np.zeros((len(data), len(unique_states) - 1))

for idx, state in enumerate(unique_states[1:]):
      state_onehot[:, idx] = (data['State'] == state).astype(int)
numerical_columns = data.columns.drop('State')
data_encoded = np.hstack((state_onehot, data[numerical_columns].values))

encoded_column_names = [f'State_{state}' for state in unique_states[1:]]
all_column_names = encoded_column_names + list(numerical_columns)

data_encoded_df = pd.DataFrame(data_encoded, columns=all_column_names)

X = data_encoded_df.drop('Profit', axis=1).values
y = data_encoded_df['Profit'].values

split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

X_train_augmented = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_augmented = np.c_[np.ones(X_test.shape[0]), X_test]
theta = np.linalg.inv(X_train_augmented.T @ X_train_augmented) @ X_train_augmented.T @ y_train

y_pred = X_test_augmented @ theta

mse = np.mean((y_test - y_pred) ** 2)
r2 = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))

print("Multiple Linear Regression Results:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")

coefficients = pd.DataFrame({
    "Feature": ["Intercept"] + [f"Feature_{i}" for i in range(1, X_train_augmented.shape[1])],
    "Coefficient": theta
})
print("\nModel Coefficients:")
print(coefficients)


Multiple Linear Regression Results:
Mean Squared Error (MSE): 239539562.2908166
R-squared (R²): 0.38538399087029185

Model Coefficients:
     Feature   Coefficient
0  Intercept  54861.320804
1  Feature_1   3374.837589
2  Feature_2   1414.315140
3  Feature_3      0.763218
4  Feature_4     -0.045782
5  Feature_5      0.029118


##Apply L2 regularization on the created simple and multiple linear regression. What is yourobservation?

In [5]:
import numpy as np
X_train_np = (X_train - X_train.mean()) / X_train.std()
X_test_np = (X_test - X_train.mean()) / X_train.std()
X_train_np = np.c_[np.ones(X_train_np.shape[0]), X_train_np]
X_test_np = np.c_[np.ones(X_test_np.shape[0]), X_test_np]

y_train_np = y_train.values
y_test_np = y_test.values
lambda_ = 1.0
n_features = X_train_np.shape[1]
I = np.eye(n_features)
I[0, 0] = 0
theta_ridge = np.linalg.inv(X_train_np.T.dot(X_train_np) + lambda_ * I).dot(X_train_np.T).dot(y_train_np)
y_pred_ridge_np = X_test_np.dot(theta_ridge)
mse_ridge = mean_squared_error(y_test_np, y_pred_ridge_np)
r2_ridge = r2_score(y_test_np, y_pred_ridge_np)

print("Ridge Regression Results (Closed-Form Solution):")
print(f"Mean Squared Error (MSE): {mse_ridge}")
print(f"R-squared (R²): {r2_ridge}")
print("\nFinal Model Parameters (Theta):")
print(theta_ridge)


Ridge Regression Results (Closed-Form Solution):
Mean Squared Error (MSE): 84826956.88045646
R-squared (R²): 0.8952484722488928

Final Model Parameters (Theta):
[ 1.15651720e+05  3.33457929e+02  7.18979529e+01  3.63865634e+04
 -1.35248792e+03  5.01726242e+03]


##Apply L1 regularization on the created simple and multiple linear regression. What is your observation?

In [11]:
import numpy as np
import pandas as pd
def lasso_regression(X, y, lambda_=1.0, max_iter=1000, tol=1e-4):
    m, n = X.shape
    theta = np.zeros(n)
    X_transpose = X.T
    prev_theta = np.copy(theta)

    for _ in range(max_iter):

        for j in range(n):
            X_j = X[:, j]
            residual = y - X.dot(theta) + theta[j] * X_j

            if j == 0:
                theta[j] = np.dot(X_j, residual) / np.dot(X_j, X_j)
            else:
                theta[j] = np.sign(np.dot(X_j, residual)) * max(0, (np.dot(X_j, residual) - lambda_) / np.dot(X_j, X_j))

        if np.linalg.norm(theta - prev_theta, ord=2) < tol:
            break
        prev_theta = np.copy(theta)

    return theta


def simple_lasso(X_train, y_train, X_test, y_test, lambda_=1.0):
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]
    theta = lasso_regression(X_train, y_train, lambda_)

    y_pred = X_test.dot(theta)
    mse = np.mean((y_test - y_pred) ** 2)
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    ss_res = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)

    return theta, mse, r2
def multiple_lasso(X_train, y_train, X_test, y_test, lambda_=1.0):
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]

    theta = lasso_regression(X_train, y_train, lambda_)

    y_pred = X_test.dot(theta)

    mse = np.mean((y_test - y_pred) ** 2)
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    ss_res = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)

    return theta, mse, r2

X_train_simple = np.array([1, 2, 3, 4, 5])
y_train_simple = np.array([1, 2, 1.5, 3.5, 5])
X_test_simple = np.array([6, 7, 8])
y_test_simple = np.array([5, 6, 7])

theta_simple, mse_simple, r2_simple = simple_lasso(X_train_simple, y_train_simple, X_test_simple, y_test_simple, lambda_=1.0)
print("Simple Lasso Regression Results:")
print(f"Model Coefficients (Theta): {theta_simple}")
print(f"Mean Squared Error (MSE): {mse_simple}")
print(f"R-squared (R²): {r2_simple}")

X_train_multiple = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y_train_multiple = np.array([5, 7, 8, 10, 12])
X_test_multiple = np.array([[6, 7], [7, 8], [8, 9]])
y_test_multiple = np.array([14, 16, 18])

theta_multiple, mse_multiple, r2_multiple = multiple_lasso(X_train_multiple, y_train_multiple, X_test_multiple, y_test_multiple, lambda_=1.0)
print("Multiple Lasso Regression Results:")
print(f"Model Coefficients (Theta): {theta_multiple}")
print(f"Mean Squared Error (MSE): {mse_multiple}")
print(f"R-squared (R²): {r2_multiple}")


Simple Lasso Regression Results:
Model Coefficients (Theta): [0.05037319 0.84989822]
Mean Squared Error (MSE): 0.01502047794024052
R-squared (R²): 0.9774692830896392
Multiple Lasso Regression Results:
Model Coefficients (Theta): [3.60038475 1.59989507 0.        ]
Mean Squared Error (MSE): 1.5475622235014512
R-squared (R²): 0.4196641661869558
