In [63]:
# Importing the NumPy library for numerical computations
import numpy as np

# Importing the pandas library for data manipulation and analysis
import pandas as pd

# Importing the LinearRegression model from scikit-learn's linear_model module
from sklearn.linear_model import LinearRegression

In [64]:
# Reading the dataset 'Salary_Data.csv' into a pandas DataFrame
data = pd.read_csv('Salary_Data.csv')

In [65]:
# Creating a NumPy array 'x' with given values and reshaping it into a 2D array (column vector)
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))


# Creating a NumPy array 'y' with given values representing the target variable
y = np.array([5, 20, 14, 32, 22, 38])

In [66]:
x


array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [67]:
y


array([ 5, 20, 14, 32, 22, 38])

In [68]:
# Creating an instance of the LinearRegression model from scikit-learn
model = LinearRegression()

In [69]:
# Training the Linear Regression model using the given data
model.fit(x, y)

In [70]:
model = LinearRegression().fit(x, y)

In [71]:
r_sq = model.score(x, y)
print(f"coefficient of determination: {r_sq}")

coefficient of determination: 0.715875613747954


In [72]:
print(f"intercept: {model.intercept_}")


print(f"slope: {model.coef_}")

intercept: 5.633333333333329
slope: [0.54]


In [73]:
new_model = LinearRegression().fit(x, y.reshape((-1, 1)))
print(f"intercept: {new_model.intercept_}")


print(f"slope: {new_model.coef_}")

intercept: [5.63333333]
slope: [[0.54]]


In [74]:
y_pred = model.predict(x)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [75]:
y_pred = model.intercept_ + model.coef_ * x
print(f"predicted response:\n{y_pred}")

predicted response:
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


In [76]:
x_new = np.arange(5).reshape((-1, 1))
x_new



array([[0],
       [1],
       [2],
       [3],
       [4]])

In [77]:
y_new = model.predict(x_new)
y_new

array([5.63333333, 6.17333333, 6.71333333, 7.25333333, 7.79333333])

In [78]:
import numpy as np
from sklearn.linear_model import LinearRegression

x = [
  [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [79]:
x



array([[ 0,  1],
       [ 5,  1],
       [15,  2],
       [25,  5],
       [35, 11],
       [45, 15],
       [55, 34],
       [60, 35]])

In [80]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [81]:
model = LinearRegression().fit(x, y)

In [82]:
r_sq = model.score(x, y)
print(f"coefficient of determination: {r_sq}")


print(f"intercept: {model.intercept_}")


print(f"coefficients: {model.coef_}")

coefficient of determination: 0.8615939258756775
intercept: 5.52257927519819
coefficients: [0.44706965 0.25502548]


In [83]:
y_pred = model.predict(x)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [84]:
y_pred = model.intercept_ + np.sum(model.coef_ * x, axis=1)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [85]:
x_new = np.arange(10).reshape((-1, 2))
x_new


array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [86]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [87]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

In [88]:
transformer = PolynomialFeatures(degree=2, include_bias=False)

In [89]:
transformer.fit(x)

In [90]:
x_ = transformer.transform(x)

In [91]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [114]:
x_

array([[0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00],
       [5.000e+00, 1.000e+00, 2.500e+01, 5.000e+00, 1.000e+00],
       [1.500e+01, 2.000e+00, 2.250e+02, 3.000e+01, 4.000e+00],
       [2.500e+01, 5.000e+00, 6.250e+02, 1.250e+02, 2.500e+01],
       [3.500e+01, 1.100e+01, 1.225e+03, 3.850e+02, 1.210e+02],
       [4.500e+01, 1.500e+01, 2.025e+03, 6.750e+02, 2.250e+02],
       [5.500e+01, 3.400e+01, 3.025e+03, 1.870e+03, 1.156e+03],
       [6.000e+01, 3.500e+01, 3.600e+03, 2.100e+03, 1.225e+03]])

In [115]:
model = LinearRegression().fit(x_, y)

In [116]:
r_sq = model.score(x_, y)
print(f"coefficient of determination: {r_sq}")


print(f"intercept: {model.intercept_}")


print(f"coefficients: {model.coef_}")

coefficient of determination: 0.9453701449127822
intercept: 0.8430556452395734
coefficients: [ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]


In [117]:
x_ = PolynomialFeatures(degree=2, include_bias=True).fit_transform(x)

In [118]:
x_

array([[1.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 1.000e+00, 0.000e+00,
        1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00],
       [1.000e+00, 1.000e+00, 5.000e+00, 1.000e+00, 1.000e+00, 5.000e+00,
        1.000e+00, 2.500e+01, 5.000e+00, 1.000e+00],
       [1.000e+00, 1.000e+00, 1.500e+01, 2.000e+00, 1.000e+00, 1.500e+01,
        2.000e+00, 2.250e+02, 3.000e+01, 4.000e+00],
       [1.000e+00, 1.000e+00, 2.500e+01, 5.000e+00, 1.000e+00, 2.500e+01,
        5.000e+00, 6.250e+02, 1.250e+02, 2.500e+01],
       [1.000e+00, 1.000e+00, 3.500e+01, 1.100e+01, 1.000e+00, 3.500e+01,
        1.100e+01, 1.225e+03, 3.850e+02, 1.210e+02],
       [1.000e+00, 1.000e+00, 4.500e+01, 1.500e+01, 1.000e+00, 4.500e+01,
        1.500e+01, 2.025e+03, 6.750e+02, 2.250e+02],
       [1.000e+00, 1.000e+00, 5.500e+01, 3.400e+01, 1.000e+00, 5.500e+01,
        3.400e+01, 3.025e+03, 1.870e+03, 1.156e+03],
       [1.000e+00, 1.000e+00, 6.000e+01, 3.500e+01, 1.000e+00, 6.000e+01,
        3.500e+01, 3.600e+03, 2.100e+

In [119]:
model = LinearRegression(fit_intercept=False).fit(x_, y)

In [120]:
r_sq = model.score(x_, y)
print(f"coefficient of determination: {r_sq}")


print(f"intercept: {model.intercept_}")


print(f"coefficients: {model.coef_}")

coefficient of determination: 0.9453701449127823
intercept: 0.0
coefficients: [ 0.28101855  0.28101855  1.22414137  0.08080177  0.28101855  1.22414137
  0.08080177 -0.15259677  0.47928683 -0.4641851 ]


In [121]:
y_pred = model.predict(x_)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
 39.05631386 41.92339046]


In [122]:
# Step 1: Import packages and classes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Step 2a: Provide data
x = [
  [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

# Step 2b: Transform input data
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

# Step 3: Create a model and fit it
model = LinearRegression().fit(x_, y)

# Step 4: Get results
r_sq = model.score(x_, y)
intercept, coefficients = model.intercept_, model.coef_

# Step 5: Predict response
y_pred = model.predict(x_)

In [123]:
print(f"coefficient of determination: {r_sq}")


print(f"intercept: {intercept}")


print(f"coefficients:\n{coefficients}")



print(f"predicted response:\n{y_pred}")

coefficient of determination: 0.9453701449127822
intercept: 0.8430556452395734
coefficients:
[ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]
predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
 39.05631386 41.92339046]


In [124]:
import numpy as np
import statsmodels.api as sm

In [125]:
x = [
  [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [126]:
x = sm.add_constant(x)

In [127]:
x


array([[ 1.,  0.,  1.],
       [ 1.,  5.,  1.],
       [ 1., 15.,  2.],
       [ 1., 25.,  5.],
       [ 1., 35., 11.],
       [ 1., 45., 15.],
       [ 1., 55., 34.],
       [ 1., 60., 35.]])

In [128]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [129]:
model = sm.OLS(y, x)

In [130]:
results = model.fit()

In [131]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Fri, 28 Feb 2025   Prob (F-statistic):            0.00713
Time:                        10:52:30   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2



In [132]:
# Printing the coefficient of determination (R-squared), which measures how well the model fits the data
print(f"coefficient of determination: {results.rsquared}")

# Printing the adjusted R-squared, which accounts for the number of predictors and prevents overfitting
print(f"adjusted coefficient of determination: {results.rsquared_adj}")

# Printing the regression coefficients (intercept and slopes) learned by the model
print(f"regression coefficients: {results.params}")

coefficient of determination: 0.8615939258756777
adjusted coefficient of determination: 0.8062314962259488
regression coefficients: [5.52257928 0.44706965 0.25502548]


In [133]:
# Printing the predicted response (fitted values) from the regression model
print(f"predicted response:\n{results.fittedvalues}")



# Predicting responses for the given input 'x' using the trained model and printing them
print(f"predicted response:\n{results.predict(x)}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [134]:
# Creating a new 2D NumPy array with values from 0 to 9, reshaped into 2 columns
x_new = sm.add_constant(np.arange(10).reshape((-1, 2)))

# Adding a constant column (bias term) to x_new for regression
x_new


array([[1., 0., 1.],
       [1., 2., 3.],
       [1., 4., 5.],
       [1., 6., 7.],
       [1., 8., 9.]])

In [135]:
# Assuming x_new is the new set of feature values (must be a 2D array)
y_new = results.predict(x_new)

 # Display the predicted values
y_new

array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])