In [None]:
1. Write a Python script to visualize the distribution of errors (residuals) for a multiple linear regression model
using Seaborn's "diamonds" dataset.

# Import necessary libraries
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the diamonds dataset
diamonds = sns.load_dataset('diamonds')

# Fit the multiple linear regression model
model = ols('price ~ carat + depth + table', data=diamonds).fit()

# Calculate residuals
residuals = model.resid

# Create a DataFrame with residuals
residuals_df = diamonds[['carat', 'depth', 'table']].copy()
residuals_df['residuals'] = residuals

# Visualize the distribution of residuals
plt.figure(figsize=(8, 6))
sns.histplot(residuals, kde=True)
plt.title('Distribution of Residuals')
plt.xlabel('Residual Value')
plt.ylabel('Frequency')
plt.show()

# Visualize residuals vs. fitted values
fitted_values = model.fittedvalues
plt.figure(figsize=(8, 6))
sns.scatterplot(x=fitted_values, y=residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Residuals vs. Fitted Values')
plt.xlabel('Fitted Value')
plt.ylabel('Residual')
plt.show()

# Visualize residuals vs. independent variables
for column in ['carat', 'depth', 'table']:
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=diamonds[column], y=residuals)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title(f'Residuals vs. {column}')
    plt.xlabel(column)
    plt.ylabel('Residual')
    plt.show()

2. Write a Python script to calculate and print Mean Squared Error (MSE), Mean Absolute Error (MAE), and Root
Mean Squared Error (RMSE) for a linear regression model.

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

# Generate sample data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate MSE, MAE, and RMSE
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mse)

# Print the metrics
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

3.  Write a Python script to check if the assumptions of linear regression are met. Use a scatter plot to check
linearity, residuals plot for homoscedasticity, and correlation matrix for multicollinearity.

# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

# Generate sample data
np.random.seed(0)
X1 = np.random.rand(100)
X2 = np.random.rand(100)
y = 3 + 2 * X1 + np.random.randn(100)

# Create a DataFrame
df = pd.DataFrame({'X1': X1, 'X2': X2, 'y': y})

# Scatter plot to check linearity
plt.figure(figsize=(8, 6))
sns.scatterplot(x='X1', y='y', data=df)
plt.title('Scatter Plot of X1 vs. y')
plt.show()

# Fit the linear regression model
X = df[['X1', 'X2']]
y = df['y']
model = LinearRegression()
model.fit(X, y)

# Calculate residuals
y_pred = model.predict(X)
residuals = y - y_pred

# Residuals plot to check homoscedasticity
plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_pred, y=residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Residuals vs. Fitted Values')
plt.xlabel('Fitted Values')
plt.ylabel('Residuals')
plt.show()

# Correlation matrix to check multicollinearity
corr_matrix = df[['X1', 'X2']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', square=True)
plt.title('Correlation Matrix')
plt.show()

4.  Write a Python script that creates a machine learning pipeline with feature scaling and evaluates the
performance of different regression models

# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Generate sample data
np.random.seed(0)
X = np.random.rand(100, 3)
y = 3 + 2 * X[:, 0] + np.random.randn(100)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define regression models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest Regressor': RandomForestRegressor(),
    'Support Vector Regressor': SVR()
}

# Create a pipeline with feature scaling
for name, model in models.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model)
    ])

    # Train the pipeline
    pipeline.fit(X_train, y_train)

    # Make predictions
    y_pred = pipeline.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Print the results
    print(f"Model: {name}")
    print(f"MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")
    print()


5. Implement a simple linear regression model on a dataset and print the model's coefficients, intercept, and
R-squared score.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

# Generate sample data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficients and intercept
coefficient = model.coef_[0][0]
intercept = model.intercept_[0]

# Make predictions
y_pred = model.predict(X)

# Calculate the R-squared score
r2 = r2_score(y, y_pred)

# Print the model's coefficients, intercept, and R-squared score
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Plot the data and the regression line
plt.scatter(X, y, label='Data')
plt.plot(X, y_pred, color='red', label='Regression Line')
plt.legend()
plt.show()

6. Write a Python script that analyzes the relationship between total bill and tip in the 'tips' dataset using
simple linear regression and visualizes the results.

# Import necessary libraries
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

# Load the tips dataset
tips = sns.load_dataset('tips')

# Define the independent and dependent variables
X = tips[['total_bill']]
y = tips['tip']

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficient and intercept
coefficient = model.coef_[0]
intercept = model.intercept_

# Make predictions
y_pred = model.predict(X)

# Plot the data and the regression line
plt.figure(figsize=(8, 6))
plt.scatter(X['total_bill'], y, label='Data')
plt.plot(X['total_bill'], y_pred, color='red', label='Regression Line')
plt.xlabel('Total Bill ($)')
plt.ylabel('Tip ($)')
plt.title('Relationship between Total Bill and Tip')
plt.legend()
plt.show()

# Print the model's coefficient and intercept
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")



7. Write a Python script that fits a linear regression model to a synthetic dataset with one feature. Use the
model to predict new values and plot the data points along with the regression line.

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate synthetic dataset
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficient and intercept
coefficient = model.coef_[0][0]
intercept = model.intercept_[0]

# Print the model's coefficient and intercept
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

# Make predictions
y_pred = model.predict(X)

# Plot the data points and the regression line
plt.scatter(X, y, label='Data Points')
plt.plot(X, y_pred, color='red', label='Regression Line')
plt.xlabel('Feature')
plt.ylabel('Target Variable')
plt.title('Linear Regression')
plt.legend()
plt.show()

# Predict new values
new_X = np.array([[1.5]])
new_y_pred = model.predict(new_X)
print(f"Predicted value for X = {new_X[0][0]}: {new_y_pred[0][0]:.4f}")


8. Write a Python script that pickles a trained linear regression model and saves it to a file.

# Import necessary libraries
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression

# Generate sample data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Pickle the trained model and save it to a file
with open('linear_regression_model.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Trained linear regression model saved to linear_regression_model.pkl")


9. Write a Python script that fits a polynomial regression model (degree 2) to a dataset and plots the
regression curve.
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Generate sample data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + 2 * X**2 + np.random.randn(100, 1)

# Create polynomial features
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X)

# Create and fit the polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Get the coefficients
coefficient_2 = model.coef_[0][2]
coefficient_1 = model.coef_[0][1]
intercept = model.intercept_[0]

# Print the coefficients
print(f"Coefficient of x^2: {coefficient_2:.4f}")
print(f"Coefficient of x: {coefficient_1:.4f}")
print(f"Intercept: {intercept:.4f}")

# Generate data for plotting the regression curve
X_test = np.linspace(0, 2, 100).reshape(-1, 1)
X_test_poly = poly_features.transform(X_test)
y_pred = model.predict(X_test_poly)

# Plot the data points and the regression curve
plt.scatter(X, y, label='Data Points')
plt.plot(X_test, y_pred, color='red', label='Regression Curve')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()


10. Generate synthetic data for simple linear regression (use random values for X and y) and fit a linear
regression model to the data. Print the model's coefficient and intercept.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 1)
y = 3 + 2 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficient and intercept
coefficient = model.coef_[0][0]
intercept = model.intercept_[0]

# Print the model's coefficient and intercept
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

11. Write a Python script that fits polynomial regression models of different degrees to a synthetic dataset and
compares their performance.
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

# Generate synthetic dataset
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + 2 * X**2 + np.random.randn(100, 1)

# Define polynomial regression models with different degrees
degrees = [1, 2, 3, 4, 5]
models = []
mse_values = []

for degree in degrees:
    model = Pipeline([
        ('poly_features', PolynomialFeatures(degree=degree)),
        ('linear_regression', LinearRegression())
    ])
    model.fit(X, y)
    models.append(model)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    mse_values.append(mse)
    print(f"Degree: {degree}, MSE: {mse:.4f}")

# Plot the data points and regression curves
plt.scatter(X, y, label='Data Points')
for degree, model in zip(degrees, models):
    X_test = np.linspace(0, 2, 100).reshape(-1, 1)
    y_pred = model.predict(X_test)
    plt.plot(X_test, y_pred, label=f'Degree {degree}')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()

# Plot MSE values
plt.plot(degrees, mse_values, marker='o')
plt.xlabel('Degree')
plt.ylabel('MSE')
plt.title('MSE vs Degree')
plt.show()


12.  Write a Python script that fits a simple linear regression model with two features and prints the model's
coefficients, intercept, and R-squared score.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate sample data
np.random.seed(0)
X = np.random.rand(100, 2)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + np.random.randn(100)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_

# Print the model's coefficients and intercept
print(f"Coefficients: {coefficients}")
print(f"Intercept: {intercept:.4f}")

# Make predictions
y_pred = model.predict(X)

# Calculate the R-squared score
r2 = r2_score(y, y_pred)

# Print the R-squared score
print(f"R-squared Score: {r2:.4f}")


13.  Write a Python script that generates synthetic data, fits a linear regression model, and visualizes the
regression line along with the data points.

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficient and intercept
coefficient = model.coef_[0][0]
intercept = model.intercept_[0]

# Print the model's coefficient and intercept
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

# Generate data for plotting the regression line
X_test = np.linspace(0, 2, 100).reshape(-1, 1)
y_pred = model.predict(X_test)

# Plot the data points and the regression line
plt.scatter(X, y, label='Data Points')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Linear Regression')
plt.legend()
plt.show()


14. Write a Python script that uses the Variance Inflation Factor (VIF) to check for multicollinearity in a dataset
with multiple features

# Import necessary libraries
import numpy as np
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Generate sample data
np.random.seed(0)
X1 = np.random.rand(100)
X2 = 2 * X1 + np.random.randn(100) * 0.1  # X2 is highly correlated with X1
X3 = np.random.rand(100)
X = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3})

# Calculate VIF for each feature
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif["features"] = X.columns

# Print the VIF values
print(vif)


15. Write a Python script that generates synthetic data for a polynomial relationship (degree 4), fits a
polynomial regression model, and plots the regression curve.

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1) - 1  # Generate X values between -1 and 1
y = 4 + 3 * X + 2 * X**2 + 1 * X**3 + 0.5 * X**4 + np.random.randn(100, 1)

# Create polynomial features
poly_features = PolynomialFeatures(degree=4)
X_poly = poly_features.fit_transform(X)

# Create and fit the polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Generate data for plotting the regression curve
X_test = np.linspace(-1, 1, 100).reshape(-1, 1)
X_test_poly = poly_features.transform(X_test)
y_pred = model.predict(X_test_poly)

# Plot the data points and the regression curve
plt.scatter(X, y, label='Data Points')
plt.plot(X_test, y_pred, color='red', label='Regression Curve')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()


16. Write a Python script that creates a machine learning pipeline with data standardization and a multiple
linear regression model, and prints the R-squared score.

# Import necessary libraries
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate sample data
np.random.seed(0)
X = np.random.rand(100, 3)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + np.random.randn(100)

# Create a pipeline with standardization and multiple linear regression
pipeline = Pipeline([
    ('standardizer', StandardScaler()),
    ('regressor', LinearRegression())
])

# Fit the pipeline
pipeline.fit(X, y)

# Make predictions
y_pred = pipeline.predict(X)

# Calculate the R-squared score
r2 = r2_score(y, y_pred)

# Print the R-squared score
print(f"R-squared Score: {r2:.4f}")


17. Write a Python script that performs polynomial regression (degree 3) on a synthetic dataset and plots the
regression curve

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1) - 1
y = 3 * X**3 + 2 * X**2 + X + np.random.randn(100, 1)

# Create polynomial features
poly_features = PolynomialFeatures(degree=3)
X_poly = poly_features.fit_transform(X)

# Create and fit the polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Generate data for plotting the regression curve
X_test = np.linspace(-1, 1, 100).reshape(-1, 1)
X_test_poly = poly_features.transform(X_test)
y_pred = model.predict(X_test_poly)

# Plot the data points and the regression curve
plt.scatter(X, y, label='Data Points')
plt.plot(X_test, y_pred, color='red', label='Regression Curve')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()


18. Write a Python script that performs multiple linear regression on a synthetic dataset with 5 features. Print
the R-squared score and model coefficients.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 5)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + 0.5 * X[:, 2] + np.random.randn(100)

# Create and fit the multiple linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_

# Print the model coefficients and intercept
print("Model Coefficients:")
for i, coefficient in enumerate(coefficients):
    print(f"Feature {i+1}: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

# Make predictions
y_pred = model.predict(X)

# Calculate the R-squared score
r2 = r2_score(y, y_pred)

# Print the R-squared score
print(f"R-squared Score: {r2:.4f}")


19.  Write a Python script that generates synthetic data for linear regression, fits a model, and visualizes the
data points along with the regression line.

# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model's coefficient and intercept
coefficient = model.coef_[0][0]
intercept = model.intercept_[0]

# Print the model's coefficient and intercept
print(f"Coefficient: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

# Generate data for plotting the regression line
X_test = np.linspace(0, 2, 100).reshape(-1, 1)
y_pred = model.predict(X_test)

# Plot the data points and the regression line
plt.scatter(X, y, label='Data Points')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Linear Regression')
plt.legend()
plt.show()


20. Create a synthetic dataset with 3 features and perform multiple linear regression. Print the model's Rsquared score and coefficients.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 3)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + 0.5 * X[:, 2] + np.random.randn(100)

# Create and fit the multiple linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the model coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_

# Print the model coefficients and intercept
print("Model Coefficients:")
for i, coefficient in enumerate(coefficients):
    print(f"Feature {i+1}: {coefficient:.4f}")
print(f"Intercept: {intercept:.4f}")

# Make predictions
y_pred = model.predict(X)

# Calculate the R-squared score
r2 = r2_score(y, y_pred)

# Print the R-squared score
print(f"R-squared Score: {r2:.4f}")



21. Write a Python script that demonstrates how to serialize and deserialize machine learning models using
joblib instead of pickling

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
import joblib

# Generate sample data
np.random.seed(0)
X = np.random.rand(100, 1)
y = 3 + 2 * X + np.random.randn(100, 1)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Serialize the model using joblib
joblib.dump(model, 'linear_regression_model.joblib')

# Deserialize the model using joblib
loaded_model = joblib.load('linear_regression_model.joblib')

# Make predictions using the loaded model
y_pred = loaded_model.predict(X)

# Print the model's coefficient and intercept
print(f"Coefficient: {loaded_model.coef_[0][0]:.4f}")
print(f"Intercept: {loaded_model.intercept_[0]:.4f}")


22. Write a Python script to perform linear regression with categorical features using one-hot encoding. Use
the Seaborn 'tips' dataset.

# Import necessary libraries
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the tips dataset
tips = sns.load_dataset('tips')

# Define features (X) and target variable (y)
X = tips[['sex', 'smoker', 'day', 'time']]
y = tips['total_bill']

# One-hot encode categorical features
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)

# Print the mean squared error
print(f"Mean Squared Error: {mse:.4f}")


23. Compare Ridge Regression with Linear Regression on a synthetic dataset and print the coefficients and Rsquared score.

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import r2_score

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 3)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + 0.5 * X[:, 2] + np.random.randn(100)

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X, y)
y_pred_linear = linear_model.predict(X)

# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X, y)
y_pred_ridge = ridge_model.predict(X)

# Print coefficients
print("Linear Regression Coefficients:")
print(linear_model.coef_)
print("Ridge Regression Coefficients:")
print(ridge_model.coef_)

# Calculate R-squared scores
r2_linear = r2_score(y, y_pred_linear)
r2_ridge = r2_score(y, y_pred_ridge)

# Print R-squared scores
print(f"Linear Regression R-squared Score: {r2_linear:.4f}")
print(f"Ridge Regression R-squared Score: {r2_ridge:.4f}")


24. Write a Python script that uses cross-validation to evaluate a Linear Regression model on a synthetic
dataset

# Import necessary libraries
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_squared_error

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 3)
y = 3 + 2 * X[:, 0] + 1.5 * X[:, 1] + 0.5 * X[:, 2] + np.random.randn(100)

# Create a linear regression model
model = LinearRegression()

# Define a scoring function (negative mean squared error)
scorer = make_scorer(mean_squared_error, greater_is_better=False)

# Perform cross-validation
scores = cross_val_score(model, X, y, cv=5, scoring=scorer)

# Print the cross-validation scores
print("Cross-Validation Scores:")
print(scores)

# Print the mean and standard deviation of the scores
print(f"Mean Score: {-scores.mean():.4f}")
print(f"Standard Deviation: {scores.std():.4f}")


25. Write a Python script that compares polynomial regression models of different degrees and prints the Rsquared score for each.

# Import necessary libraries
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1) - 1
y = 3 * X**3 + 2 * X**2 + X + np.random.randn(100, 1)

# Define degrees to compare
degrees = [1, 2, 3, 4, 5]

# Compare polynomial regression models
for degree in degrees:
    # Create polynomial features
    poly_features = PolynomialFeatures(degree=degree)
    X_poly = poly_features.fit_transform(X)

    # Create and fit the polynomial regression model
    model = LinearRegression()
    model.fit(X_poly, y)

    # Make predictions
    y_pred = model.predict(X_poly)

    # Calculate the R-squared score
    r2 = r2_score(y, y_pred)

    # Print the R-squared score
    print(f"Degree {degree}: R-squared Score = {r2:.4f}")

# Plot the data and regression curves
plt.scatter(X, y, label='Data Points')
for degree in degrees:
    poly_features = PolynomialFeatures(degree=degree)
    X_poly = poly_features.fit_transform(X)
    model = LinearRegression()
    model.fit(X_poly, y)
    X_test = np.linspace(-1, 1, 100).reshape(-1, 1)
    X_test_poly = poly_features.transform(X_test)
    y_pred = model.predict(X_test_poly)
    plt.plot(X_test, y_pred, label=f'Degree {degree}')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Polynomial Regression')
plt.legend()
plt.show()
