In [1]:
URL = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-Coursera/laptop_pricing_dataset_mod2.csv"

In [4]:
import pandas as pd



# Read the CSV file into a pandas data frame
data_frame = pd.read_csv(URL)
data_frame.head()

# Assume the first row of the file can be used as the headers for the data
# If the file doesn't have headers, you can remove the 'header' parameter
#data_frame = pd.read_csv(file_path, header=0)

# Additional details:
# - The 'pd.read_csv()' function is used to read a CSV file into a pandas data frame.
# - The 'header' parameter in the 'pd.read_csv()' function specifies which row to use as the headers.
#   By default, it is set to 'infer', which means pandas will try to infer the headers from the file.
#   If the headers are in the first row, you can set the 'header' parameter to 0.

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Manufacturer,Category,GPU,OS,CPU_core,Screen_Size_inch,CPU_frequency,RAM_GB,Storage_GB_SSD,Weight_pounds,Price,Price-binned,Screen-Full_HD,Screen-IPS_panel
0,0,0,Acer,4,2,1,5,14.0,0.551724,8,256,3.528,978,Low,0,1
1,1,1,Dell,3,1,1,3,15.6,0.689655,4,256,4.851,634,Low,1,0
2,2,2,Dell,3,1,1,7,15.6,0.931034,8,256,4.851,946,Low,1,0
3,3,3,Dell,4,2,1,5,13.3,0.551724,8,128,2.6901,1244,Low,0,1
4,4,4,HP,4,2,1,7,15.6,0.62069,8,256,4.21155,837,Low,1,0


In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Assume you have a pandas data frame called 'data_frame' with two columns: 'source_variable' and 'target_variable'
# Extract the source variable and target variable from the data frame
X = data_frame[['CPU_frequency']]
y = data_frame['Price']
# Initialize a linear regression model
model = LinearRegression()
# Train the model using the source and target variables
model.fit(X, y)
# Make predictions using the trained model
y_pred = model.predict(X)
# Calculate the mean squared error (MSE)
mse = mean_squared_error(y, y_pred)
# Calculate the coefficient of determination (R^2)
r2 = r2_score(y, y_pred)
# Display the MSE and R^2 values
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)
# Additional details:
# - The 'LinearRegression' class from the 'sklearn.linear_model' module is used to create a linear regression model.
# - The 'fit()' method is used to train the model using the source and target variables.
# - The 'predict()' method is used to make predictions using the trained model.
# - The 'mean_squared_error()' function from the 'sklearn.metrics' module is used to calculate the MSE.
# - The 'r2_score()' function from the 'sklearn.metrics' module is used to calculate the R^2 value.

Mean Squared Error (MSE): 284583.4405868629
Coefficient of Determination (R^2): 0.1344436321024326


In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Assume you have a pandas data frame called 'data_frame' with multiple columns: 'source_variable_1', 'source_variable_2', ..., 'target_variable'

# Extract the source variables and target variable from the data frame
X = data_frame[['CPU_frequency', 'RAM_GB', 'Storage_GB_SSD','CPU_core','OS','GPU','Category']]
y = data_frame['Price']

# Initialize a linear regression model
model = LinearRegression()

# Train the model using the source and target variables
model.fit(X, y)

# Make predictions using the trained model
y_pred = model.predict(X)

# Calculate the mean squared error (MSE)
mse = mean_squared_error(y, y_pred)

# Calculate the coefficient of determination (R^2)
r2 = r2_score(y, y_pred)

# Display the MSE and R^2 values
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)

# Additional details:
# - The 'LinearRegression' class from the 'sklearn.linear_model' module is used to create a linear regression model.
# - The 'fit()' method is used to train the model using the source and target variables.
# - The 'predict()' method is used to make predictions using the trained model.
# - The 'mean_squared_error()' function from the 'sklearn.metrics' module is used to calculate the MSE.
# - The 'r2_score()' function from the 'sklearn.metrics' module is used to calculate the R^2 value.

Mean Squared Error (MSE): 161680.57263893107
Coefficient of Determination (R^2): 0.5082509055187374


In [10]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

# Assume you have a pandas data frame called 'data_frame' with two columns: 'source_variable' and 'target_variable'

# Extract the source variable and target variable from the data frame
X = data_frame[['CPU_frequency']]
y = data_frame['Price']

# Initialize lists to store the MSE and R^2 values for each model
mse_values = []
r2_values = []

# Loop through the polynomial orders
for order in [2, 3, 5]:
    # Create polynomial features
    polynomial_features = PolynomialFeatures(degree=order)
    X_poly = polynomial_features.fit_transform(X)

    # Initialize a linear regression model
    model = LinearRegression()

    # Train the model using the polynomial features and target variable
    model.fit(X_poly, y)

    # Make predictions using the trained model
    y_pred = model.predict(X_poly)

    # Calculate the mean squared error (MSE)
    mse = mean_squared_error(y, y_pred)

    # Calculate the coefficient of determination (R^2)
    r2 = r2_score(y, y_pred)

    # Append the MSE and R^2 values to the lists
    mse_values.append(mse)
    r2_values.append(r2)

    # Display the MSE and R^2 values for the current model
    print(f"Polynomial Order {order}:")
    print("Mean Squared Error (MSE):", mse)
    print("Coefficient of Determination (R^2):", r2)
    print()

# Compare the performance of the models
best_order = np.argmin(mse_values)
worst_order = np.argmax(r2_values)

print("Model Comparison:")
print(f"Best Polynomial Order: {best_order +2}")
print(f"Worst Polynomial Order: {worst_order + 2}")

# Additional details:
# - The 'PolynomialFeatures' class from the 'sklearn.preprocessing' module is used to create polynomial features.
# - The 'fit_transform()' method is used to transform the source variable into polynomial features.
# - The 'degree' parameter in the 'PolynomialFeatures' class specifies the maximum degree of the polynomial features.
# - The 'argmin()' and 'argmax()' functions from the 'numpy' module are used to find the index of the minimum and maximum values in a list, respectively.

Polynomial Order 2:
Mean Squared Error (MSE): 249022.66596751162
Coefficient of Determination (R^2): 0.24260120745423808

Polynomial Order 3:
Mean Squared Error (MSE): 241024.86303848773
Coefficient of Determination (R^2): 0.266926407965311

Polynomial Order 5:
Mean Squared Error (MSE): 229137.29548056272
Coefficient of Determination (R^2): 0.30308227064430593

Model Comparison:
Best Polynomial Order: 2
Worst Polynomial Order: 4


In [11]:

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures


# Assume you have a pandas data frame called 'data_frame' with multiple columns: 'source_variable_1', 'source_variable_2', ..., 'target_variable'

# Extract the source variables and target variable from the data frame
X = data_frame[['CPU_frequency', 'RAM_GB', 'Storage_GB_SSD','CPU_core','OS','GPU','Category']]
y = data_frame['Price']

# Create a pipeline that performs parameter scaling, polynomial feature generation, and linear regression
pipeline = make_pipeline(
    StandardScaler(),
    PolynomialFeatures(degree=2),
    LinearRegression()
)

# Train the model using the source and target variables
pipeline.fit(X, y)

# Make predictions using the trained model
y_pred = pipeline.predict(X)

# Calculate the mean squared error (MSE)
mse = mean_squared_error(y, y_pred)

# Calculate the coefficient of determination (R^2)
r2 = r2_score(y, y_pred)

# Display the MSE and R^2 values
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)

# Additional details:
# - The 'make_pipeline()' function from the 'sklearn.pipeline' module is used to create a pipeline.
# - The 'StandardScaler' class from the 'sklearn.preprocessing' module is used to perform parameter scaling.
# - The 'PolynomialFeatures' class from the 'sklearn.preprocessing' module is used to create polynomial features.
# - The 'LinearRegression' class from the 'sklearn.linear_model' module is used for linear regression.
# - The pipeline automatically applies the transformations in the specified order.

Mean Squared Error (MSE): 120595.86128028372
Coefficient of Determination (R^2): 0.6332094535859658


In [12]:
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

# Assume you have a pandas data frame called 'data_frame' with multiple columns: 'source_variable_1', 'source_variable_2', ..., 'target_variable'

# Extract the source variables and target variable from the data frame
X = data_frame[['CPU_frequency', 'RAM_GB', 'Storage_GB_SSD','CPU_core','OS','GPU','Category']]
y = data_frame['Price']

# Create polynomial features
polynomial_features = PolynomialFeatures()

# Transform the source variables into polynomial features
X_poly = polynomial_features.fit_transform(X)

# Define the hyperparameter values for the grid search
param_grid = {'alpha': [0.1, 1.0, 10.0], 'degree': [2, 3, 4]}

# Initialize a ridge regression model
model = Ridge()

# Perform grid search with cross-validation
grid_search = GridSearchCV(model, param_grid, cv=5)

# Train the model using the polynomial features and target variable
grid_search.fit(X_poly, y)

# Make predictions using the trained model
y_pred = grid_search.predict(X_poly)

# Calculate the mean squared error (MSE)
mse = mean_squared_error(y, y_pred)

# Calculate the coefficient of determination (R^2)
r2 = r2_score(y, y_pred)

# Display the MSE and R^2 values
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)

# Additional details:
# - The 'PolynomialFeatures' class from the 'sklearn.preprocessing' module is used to create polynomial features.
# - The 'GridSearchCV' class from the 'sklearn.model_selection' module is used to perform grid search with cross-validation.
# - The 'Ridge' class from the 'sklearn.linear_model' module is used for ridge regression.
# - The 'fit_transform()' method is used to transform the source variables into polynomial features.
# - The 'param_grid' parameter in the 'GridSearchCV' class specifies the hyperparameter values to search over.
# - The 'cv' parameter in the 'GridSearchCV' class specifies the number of folds for cross-validation.
# - The best model found by grid search can be accessed using the 'best_estimator_' attribute of the grid search object.

ValueError: Invalid parameter 'degree' for estimator Ridge(alpha=0.1). Valid parameters are: ['alpha', 'copy_X', 'fit_intercept', 'max_iter', 'positive', 'random_state', 'solver', 'tol'].

In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Припустимо, у вас є pandas DataFrame 'data_frame' з колонками 'source_variable_1', 'source_variable_2', ..., 'target_variable'

# Виділяємо незалежні змінні та цільову змінну з DataFrame
X = data_frame[['CPU_frequency', 'RAM_GB', 'Storage_GB_SSD','CPU_core','OS','GPU','Category']]
y = data_frame['Price']

# Створення пайплайну з PolynomialFeatures та Ridge
model = make_pipeline(PolynomialFeatures(), Ridge())

# Визначаємо параметри для пошуку по сітці
param_grid = {
    'polynomialfeatures__degree': [2, 3, 4],  # Параметр для degree в PolynomialFeatures
    'ridge__alpha': [0.1, 1.0, 10.0]         # Параметр для alpha в Ridge
}

# Виконуємо пошук по сітці з крос-валідацією
grid_search = GridSearchCV(model, param_grid, cv=5)

# Навчаємо модель з поліноміальними ознаками та цільовою змінною
grid_search.fit(X, y)

# Прогнозуємо з найкращою моделлю
y_pred = grid_search.predict(X)

# Обчислюємо середню квадратичну помилку (MSE)
mse = mean_squared_error(y, y_pred)

# Обчислюємо коефіцієнт детермінації (R^2)
r2 = r2_score(y, y_pred)

# Виводимо значення MSE та R^2
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)

# Додаткові деталі:
# - Параметр 'degree' передається через 'polynomialfeatures__degree' у пайплайні.
# - Параметр 'alpha' передається через 'ridge__alpha' у пайплайні.
# - Найкраща модель з GridSearchCV доступна через атрибут 'best_estimator_'.


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


Mean Squared Error (MSE): 125024.56155124161
Coefficient of Determination (R^2): 0.6197396265534009




In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Припустимо, у вас є pandas DataFrame 'data_frame' з колонками 'source_variable_1', 'source_variable_2', ..., 'target_variable'

# Виділяємо незалежні змінні та цільову змінну з DataFrame
X = data_frame[['CPU_frequency', 'RAM_GB', 'Storage_GB_SSD', 'CPU_core', 'OS', 'GPU', 'Category']]
y = data_frame['Price']

# Створення пайплайну з PolynomialFeatures та Ridge
model = make_pipeline(PolynomialFeatures(degree=2), Ridge())

# Визначаємо параметри для пошуку по сітці
param_grid = {
    'ridge__alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10]  # Параметр для alpha в Ridge
}

# Виконуємо пошук по сітці з 4-кратною крос-валідацією
grid_search = GridSearchCV(model, param_grid, cv=4)

# Навчаємо модель з поліноміальними ознаками та цільовою змінною
grid_search.fit(X, y)

# Прогнозуємо з найкращою моделлю
y_pred = grid_search.predict(X)

# Обчислюємо середню квадратичну помилку (MSE)
mse = mean_squared_error(y, y_pred)

# Обчислюємо коефіцієнт детермінації (R^2)
r2 = r2_score(y, y_pred)

# Виводимо значення MSE та R^2
print("Mean Squared Error (MSE):", mse)
print("Coefficient of Determination (R^2):", r2)

# Додаткові деталі:
# - Параметр 'degree' для PolynomialFeatures вказано безпосередньо при створенні пайплайну.
# - Параметр 'alpha' для Ridge передається через 'ridge__alpha' у пайплайні.
# - Найкраща модель з GridSearchCV доступна через атрибут 'best_estimator_'.


Mean Squared Error (MSE): 128987.04078699548
Coefficient of Determination (R^2): 0.6076878039733664
