In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


# Load the dataset (assuming it's stored in a CSV file)
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
housing_data = pd.read_csv('housing.csv', header=None, delimiter=r"\s+", names=column_names)

# Split data into features (X) and target variable (y)
X = housing_data.drop('MEDV', axis=1)
y = housing_data['MEDV']




# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [None]:
print('initial data :', housing_data.head())

In [None]:
print('\nAfter Scaling:' , X_train_scaled)

In [None]:
# Perform Lasso regression
alpha = 0.1  # Regularization parameter
lasso = Lasso(alpha=alpha)
lasso.fit(X_train_scaled, y_train)

# Predict on the testing set
y_pred = lasso.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Visualize feature selection
plt.figure(figsize=(10, 6))
plt.plot(range(len(lasso.coef_)), lasso.coef_, marker='o', linestyle='--', color='b', label='Lasso coefficients')
plt.axhline(y=0, color='r', linestyle='-')
plt.title('Feature Selection with Lasso')
plt.xlabel('Feature Index')
plt.ylabel('Coefficient')
plt.xticks(range(len(X.columns)), X.columns, rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Count the number of selected features
num_selected_features = np.sum(lasso.coef_ != 0)
print("Number of columns selected:", num_selected_features)



In [None]:
# Identify rejected columns
rejected_columns = X.columns[lasso.coef_ == 0]

# Explanation
print("Rejected columns:")
for col in rejected_columns:
    print(col, "- This column was rejected by Lasso regression because its coefficient became zero.")

In [None]:
# Define a range of lambda values (alpha values)
alpha_values = [0.001, 0.01, 0.1, 0.5, 1.0, 5.0]

# Lists to store results
rmse_values = []
selected_features = []

# Loop through different lambda values
for alpha in alpha_values:
    # Perform Lasso regression
    lasso = Lasso(alpha=alpha)
    lasso.fit(X_train_scaled, y_train)

    # Predict on the testing set
    y_pred = lasso.predict(X_test_scaled)

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmse_values.append(rmse)

    # Count selected features
    num_selected_features = np.sum(lasso.coef_ != 0)
    selected_features.append(num_selected_features)

    print(f"Lambda: {alpha}, RMSE: {rmse}, Number of selected features: {num_selected_features}")

# Plot RMSE vs. lambda
plt.figure(figsize=(10, 6))
plt.plot(alpha_values, rmse_values, marker='o', linestyle='-', color='b')
plt.title('RMSE vs. Lambda (Alpha)')
plt.xlabel('Lambda (Alpha)')
plt.ylabel('RMSE')
plt.xscale('log')
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot Number of Selected Features vs. lambda
plt.figure(figsize=(10, 6))
plt.plot(alpha_values, selected_features, marker='o', linestyle='-', color='r')
plt.title('Number of Selected Features vs. Lambda (Alpha)')
plt.xlabel('Lambda (Alpha)')
plt.ylabel('Number of Selected Features')
plt.xscale('log')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a range of lambda values (alpha values)
alpha_values = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]

# Lists to store results
rmse_values = []
selected_features = []

# Loop through different lambda values
for alpha in alpha_values:
    # Perform Ridge regression
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_scaled, y_train)

    # Predict on the testing set
    y_pred = ridge.predict(X_test_scaled)

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmse_values.append(rmse)

    # Count selected features (Ridge regression does not perform feature selection)
    num_selected_features = X_train.shape[1]
    selected_features.append(num_selected_features)

    print(f"Lambda: {alpha}, RMSE: {rmse}")

# Plot RMSE vs. lambda
plt.figure(figsize=(10, 6))
plt.plot(alpha_values, rmse_values, marker='o', linestyle='-', color='b')
plt.title('RMSE vs. Lambda (Alpha) - Ridge Regression')
plt.xlabel('Lambda (Alpha)')
plt.ylabel('RMSE')
plt.xscale('log')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.linear_model import ElasticNet

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define alpha and l1_ratio values
alpha = 0.1
l1_ratio = 0.5

# Perform Elastic Net regression
elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
elastic_net.fit(X_train_scaled, y_train)

# Predict on the testing set
y_pred = elastic_net.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Print the coefficients
coefficients = pd.DataFrame({'feature': X.columns, 'coefficient': elastic_net.coef_})
print(coefficients)

In [None]:
import matplotlib.pyplot as plt

# Extract feature names and coefficients
feature_names = X.columns
coefficients = elastic_net.coef_

# Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(feature_names, coefficients, color='skyblue')
plt.xlabel('Coefficient Value')
plt.ylabel('Feature')
plt.title('Feature Coefficients - Elastic Net')
plt.grid(axis='x')
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Function to generate synthetic datasets
def generate_data(n_samples, noise):
    np.random.seed(0)
    X = np.linspace(0, 1, n_samples).reshape(-1, 1)
    y_true = 3 * X.ravel()  # True function: y = 3x
    y_noisy = y_true + np.random.normal(scale=noise, size=X.shape[0])  # Add noise
    return X, y_noisy

# Generate synthetic datasets for each scenario
X_high_bias_low_variance, y_high_bias_low_variance = generate_data(100, 2)
X_low_bias_high_variance, y_low_bias_high_variance = generate_data(100, 10)
X_perfect_fit, y_perfect_fit = generate_data(100, 2)

# Introduce outliers for the low bias and high variance scenario
X_low_bias_high_variance = np.append(X_low_bias_high_variance, [[0.1], [0.2]], axis=0)
y_low_bias_high_variance = np.append(y_low_bias_high_variance, [40, 50])

# Define a function to fit linear regression models and plot results
def fit_linear_model(X, y_true, title):
    plt.scatter(X, y_true, color='blue', label='True function')

    # Create linear regression model
    model = LinearRegression()

    # Fit model
    model.fit(X, y_true)

    # Predict
    y_pred = model.predict(X)

    # Calculate MSE
    mse = mean_squared_error(y_true, y_pred)

    # Plot true function and predicted line
    plt.plot(X, y_pred, color='red', label=f'Predicted function (MSE={mse:.2f})')

    plt.title(title)
    plt.xlabel('X')
    plt.ylabel('y')
    plt.legend()

# Plot all three scenarios
plt.figure(figsize=(18, 5))

plt.subplot(1, 3, 1)
fit_linear_model(X_high_bias_low_variance, y_high_bias_low_variance, 'High Bias and Low Variance')

plt.subplot(1, 3, 2)
fit_linear_model(X_low_bias_high_variance, y_low_bias_high_variance, 'Low Bias and High Variance')

plt.subplot(1, 3, 3)
fit_linear_model(X_perfect_fit, y_perfect_fit, 'Perfect Fit')

plt.tight_layout()
plt.show()




In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Function to generate synthetic datasets
def generate_data(n_samples, noise):
    np.random.seed(0)
    X = np.linspace(0, 1, n_samples).reshape(-1, 1)
    y_true = 3 * X.ravel()  # True function: y = 3x
    y_noisy = y_true + np.random.normal(scale=noise, size=X.shape[0])  # Add noise
    return X, y_noisy

# Generate synthetic datasets for each scenario
X_high_bias_low_variance, y_high_bias_low_variance = generate_data(100, 2)
X_low_bias_high_variance, y_low_bias_high_variance = generate_data(100, 10)
X_perfect_fit, y_perfect_fit = generate_data(100, 2)

# Introduce outliers for the low bias and high variance scenario
X_low_bias_high_variance = np.append(X_low_bias_high_variance, [[0.1], [0.2]], axis=0)
y_low_bias_high_variance = np.append(y_low_bias_high_variance, [40, 50])

# Define a function to fit linear regression models and plot results
def fit_linear_model(X, y_true, title):
    plt.scatter(X, y_true, color='blue', label='True function')

    # Create linear regression model
    model = LinearRegression()

    # Fit model
    model.fit(X, y_true)

    # Predict
    y_pred = model.predict(X)

    # Calculate MSE
    mse = mean_squared_error(y_true, y_pred)

    # Plot true function and predicted line
    plt.plot(X, y_pred, color='red', label=f'Predicted function (MSE={mse:.2f})')

    plt.title(title + f'\nTrain MSE: {mse:.2f}')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.legend()

# Plot all three scenarios
plt.figure(figsize=(18, 5))

plt.subplot(1, 3, 1)
fit_linear_model(X_high_bias_low_variance, y_high_bias_low_variance, 'High Bias and Low Variance')

plt.subplot(1, 3, 2)
fit_linear_model(X_low_bias_high_variance, y_low_bias_high_variance, 'Low Bias and High Variance')

plt.subplot(1, 3, 3)
fit_linear_model(X_perfect_fit, y_perfect_fit, 'Perfect Fit')

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Function to generate synthetic datasets
def generate_data(n_samples, noise):
    np.random.seed(0)
    X = np.linspace(0, 1, n_samples).reshape(-1, 1)
    y_true = 3 * X.ravel()  # True function: y = 3x
    y_noisy = y_true + np.random.normal(scale=noise, size=X.shape[0])  # Add noise
    return X, y_noisy

# Generate synthetic datasets for each scenario
X_high_bias_low_variance, y_high_bias_low_variance = generate_data(100, 2)
X_low_bias_high_variance, y_low_bias_high_variance = generate_data(100, 10)
X_perfect_fit, y_perfect_fit = generate_data(100, 2)

# Introduce outliers for the low bias and high variance scenario
X_low_bias_high_variance = np.append(X_low_bias_high_variance, [[0.1], [0.2]], axis=0)
y_low_bias_high_variance = np.append(y_low_bias_high_variance, [40, 50])

# Define a function to fit linear regression models, calculate MSE, and plot results
def fit_linear_model(X, y_true, title):
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.2, random_state=0)

    # Create linear regression model
    model = LinearRegression()

    # Fit model
    model.fit(X_train, y_train)

    # Predict
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    # Calculate MSE for training and testing
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)

    # Plot true function and predicted line
    plt.scatter(X, y_true, color='blue', label='True function')
    plt.plot(X, model.predict(X), color='red', label=f'Predicted function (Train MSE={mse_train:.2f}, Test MSE={mse_test:.2f})')

    plt.title(title)
    plt.xlabel('X')
    plt.ylabel('y')
    plt.legend()

# Plot all three scenarios
plt.figure(figsize=(25, 10))

plt.subplot(1, 3, 1)
fit_linear_model(X_high_bias_low_variance, y_high_bias_low_variance, 'High Bias and Low Variance')

plt.subplot(1, 3, 2)
fit_linear_model(X_low_bias_high_variance, y_low_bias_high_variance, 'Low Bias and High Variance')


plt.tight_layout()
plt.show()


In [None]:
%%writefile linear.py

import numpy as np

class LinearRegression:
    def __init__(self):
        self.coefficients = None

    def train(self, X, y):
        # Add bias term
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        # Compute coefficients using normal equation
        self.coefficients = np.linalg.inv(X.T @ X) @ X.T @ y

    def predict(self, X):
        # Add bias term
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        return X @ self.coefficients

    def mean_squared_error(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)


# Now let's write tests using pytest

import pytest

@pytest.fixture
def linear_regression():
    return LinearRegression()

def test_linear_regression_train_predict(linear_regression):
    X_train = np.array([[1], [2], [3], [4], [5]])
    y_train = np.array([2, 4, 5, 4, 5])
    linear_regression.train(X_train, y_train)

    X_test = np.array([[6], [7], [8]])
    y_pred = linear_regression.predict(X_test)

    assert len(linear_regression.coefficients) == 2  # Check coefficients shape
    assert y_pred.shape == (3,)  # Check predictions shape

def test_linear_regression_mean_squared_error(linear_regression):
    y_true = np.array([2, 4, 5, 4, 5])
    y_pred = np.array([2.2, 3.8, 5.1, 4.3, 4.9])

    mse = linear_regression.mean_squared_error(y_true, y_pred)
    assert np.isclose(mse, 0.037, atol=1e-3)  # Check mean squared error


In [None]:
!pip install pytest pytest-sugar

In [None]:
!pytest linear.py -v