In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


# Load the Boston Housing dataset

boston = load_boston()
print(load_boston)
X = boston.data
y = boston.target
"""
Multiple Linear Regression:
There are multiple input features that are used to predict the target variable.
"""
feature_names = boston.feature_names
print("feature_names:", feature_names)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("x_train:", len(X_train))
print("y_train:", len(y_train))
print("x_test:", len(X_test))
print("y_test:", len(y_test))


# Create and fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model using mean squared error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

# Calculate the R^2 score
r2 = r2_score(y_test, y_pred)
print("R^2 score using r2_score:", r2)

# Calculate the R^2 score
r2 = model.score(X_test, y_test)
print("R^2 score using model.score:", r2)

# Compare predicted values with actual values
comparison = np.column_stack((y_test, y_pred))
# print("Actual vs Predicted:")
# print(comparison)

# Get the coefficients or feature importances
coefficients = model.coef_
# coefficients = model.feature_importances_  
# Uncomment this line for feature importance with other models

# Create a dictionary to store feature names and their corresponding coefficients
feature_coefficients = dict(zip(feature_names, coefficients))
# print('feature_coefficients.items',feature_coefficients.items())

# Sort the features by their coefficients in descending order
sorted_features = sorted(feature_coefficients.items(),key=lambda x: abs(x[1]), reverse=True)
"""
The lambda expression lambda x: abs(x[1]) defines a function that takes a tuple (x) as input
and returns the absolute value of the second element of the tuple (x[1]). 
In this case, the tuples represent the feature-coefficient pairs,
where the coefficient is the second element of the tuple.
"""
# print("sorted_features:", sorted_features)
   
# Print the sorted features and their coefficients
for feature, coefficient in sorted_features:
    # print(f"__{feature}__: {coefficient}")
    print(f"{feature}: {coefficient}")


In [2]:
import pandas as pd
#  Create a DataFrame from breast cancer dataset for the feature and the target variable 
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

# Display the DataFrame
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2
