In [19]:
#ccpp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_name = 'ccpp.csv'  # Change to your file name
data = pd.read_csv(file_name)

# Define features and target variable
# Assuming the dataset has columns 'AT', 'V', 'AP', 'RH' as features and 'PE' as the target
X = data[['AT', 'V', 'AP', 'RH']]
y = data['PE']

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print regression coefficients and metrics
print(f'Regression Coefficients: {model.coef_}')
print(f'Intercept: {model.intercept_}')
print(f'Mean Squared Error: {mse}')
print(f'R2 Score (Model Accuracy): {r2}')

# New instances for prediction (provide the correct number of features as required)
X_new = np.array([[20, 40, 1010, 60],  # Replace with your new instances
                  [25, 30, 1015, 65]])
y_new_pred = model.predict(X_new)

# Print predictions for new instances
print(f'\nNew Instances X:\n{X_new}')
print(f'Predicted Output YHAT:\n{y_new_pred}')

# Calculate additional metrics for new instances
# For illustration purposes
y_mean = np.mean(y)  # Mean of actual y values from the original dataset
print(f"Mean of Actual Y Values: {y_mean}")

# Calculate residuals and squared errors for new instances
y_new_errors = y_new_pred - y_mean
squared_errors_new = y_new_errors ** 2
average_squared_error = np.mean(squared_errors_new)

print(f'YHAT - Y for new instances:\n{y_new_errors}')
print(f'Square (YHAT - Y) for new instances:\n{squared_errors_new}')
print(f'Average (Sum of Square (YHAT - Y)) for new instances: {average_squared_error}')



Regression Coefficients: [-1.97696596 -0.23476882  0.05825298 -0.15814574]
Intercept: 458.59620581655787
Mean Squared Error: 19.608085325683806
R2 Score (Model Accuracy): 0.9314747936670361

New Instances X:
[[  20   40 1010   60]
 [  25   30 1015   65]]
Predicted Output YHAT:
[459.01290343 450.97629799]
Mean of Actual Y Values: 454.36500940635455
YHAT - Y for new instances:
[ 4.64789402 -3.38871142]
Square (YHAT - Y) for new instances:
[21.60291884 11.4833651 ]
Average (Sum of Square (YHAT - Y)) for new instances: 16.543141969392572




In [16]:
#CCPP with GD

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_name = 'ccpp.csv'  # Change to your file name
data = pd.read_csv(file_name)

# Define features and target variable
X = data[['AT', 'V', 'AP', 'RH']]
y = data['PE']

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
X_train_mean = X_train.mean().values
X_train_std = X_train.std().values
X_train_normalized = (X_train - X_train_mean) / X_train_std
X_test_normalized = (X_test - X_train_mean) / X_train_std

# Add intercept term to X
X_train_b = np.c_[np.ones((X_train_normalized.shape[0], 1)), X_train_normalized]
X_test_b = np.c_[np.ones((X_test_normalized.shape[0], 1)), X_test_normalized]

# Gradient Descent parameters
eta = 0.01  # Learning rate
n_iterations = 1000  # Number of iterations
m = len(X_train_b)  # Number of training instances

# Initialize weights
theta = np.random.randn(X_train_b.shape[1])

# Gradient Descent
for iteration in range(n_iterations):
    gradients = 2/m * X_train_b.T.dot(X_train_b.dot(theta) - y_train)
    theta -= eta * gradients

# Predictions
y_pred_train = X_train_b.dot(theta)
y_pred_test = X_test_b.dot(theta)

# Calculate metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

# Print regression coefficients and metrics
print(f'Regression Coefficients (using GD): {theta[1:]}')
print(f'Intercept (using GD): {theta[0]}')
print(f'Training Mean Squared Error: {mse_train}')
print(f'Test Mean Squared Error: {mse_test}')
print(f'Training R2 Score: {r2_train}')
print(f'Test R2 Score: {r2_test}')

# New instances for prediction (provide the correct number of features as required)
X_new = np.array([[20, 40, 1010, 60],  # Replace with your new instances
                  [25, 30, 1015, 65]])
X_new_normalized = (X_new - X_train_mean) / X_train_std
X_new_b = np.c_[np.ones((X_new_normalized.shape[0], 1)), X_new_normalized]
y_new_pred = X_new_b.dot(theta)

# Print predictions for new instances
print(f'\nNew Instances X:\n{X_new}')
print(f'Predicted Output YHAT:\n{y_new_pred}')

# Calculate additional metrics for new instances
y_mean = np.mean(y)  # Mean of actual y values from the original dataset

# Calculate residuals and squared errors for new instances
y_new_errors = y_new_pred - y_mean
squared_errors_new = y_new_errors ** 2
average_squared_error = np.mean(squared_errors_new)

print(f'YHAT - Y for new instances:\n{y_new_errors}')
print(f'Square (YHAT - Y) for new instances:\n{squared_errors_new}')
print(f'Average (Sum of Square (YHAT - Y)) for new instances: {average_squared_error}')


Regression Coefficients (using GD): [-13.851084    -3.66698411   0.51378379  -2.0036459 ]
Intercept (using GD): 454.40289445322344
Training Mean Squared Error: 21.20967344152161
Test Mean Squared Error: 19.759802219192096
Training R2 Score: 0.9274937420706149
Test R2 Score: 0.9309445822129767

New Instances X:
[[  20   40 1010   60]
 [  25   30 1015   65]]
Predicted Output YHAT:
[459.45381028 452.81642386]
YHAT - Y for new instances:
[ 5.08880088 -1.54858554]
Square (YHAT - Y) for new instances:
[25.89589436  2.39811719]
Average (Sum of Square (YHAT - Y)) for new instances: 14.147005772730147


In [20]:
#boston_house_price
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_name = 'boston.csv'  # Change to your file name
data = pd.read_csv(file_name)

# Define features and target variable
X = data[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']]
y = data['MEDV']

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print regression coefficients and metrics
print(f'Regression Coefficients: {model.coef_}')
print(f'Intercept: {model.intercept_}')
print(f'Mean Squared Error: {mse}')
print(f'R2 Score (Model Accuracy): {r2}')

# New instances for prediction (provide the correct number of features as required)
# Example: New instances should have the same number of features as the original dataset
X_new = np.array([[0.1, 0, 6, 0, 0.5, 6, 60, 5, 3, 300, 15, 380, 5],  # Replace with your new instances
                  [0.2, 0, 8, 1, 0.6, 7, 50, 7, 2, 250, 16, 400, 10]])
y_new_pred = model.predict(X_new)

# Print predictions for new instances
print(f'\nNew Instances X:\n{X_new}')
print(f'Predicted Output YHAT:\n{y_new_pred}')

# Calculate additional metrics for new instances
# Get the mean of actual y values from the original dataset
y_mean = np.mean(y)
print(f"Mean of Actual Y Values: {y_mean}")
# Calculate YHAT - Y and squared errors for new instances
y_new_errors = y_new_pred - y_mean
squared_errors_new = y_new_errors ** 2
average_squared_error = np.mean(squared_errors_new)

print(f'YHAT - Y for new instances:\n{y_new_errors}')
print(f'Square (YHAT - Y) for new instances:\n{squared_errors_new}')
print(f'Average (Sum (Square (YHAT - Y))): {average_squared_error}')



Regression Coefficients: [-1.13055924e-01  3.01104641e-02  4.03807204e-02  2.78443820e+00
 -1.72026334e+01  4.43883520e+00 -6.29636221e-03 -1.44786537e+00
  2.62429736e-01 -1.06467863e-02 -9.15456240e-01  1.23513347e-02
 -5.08571424e-01]
Intercept: 30.24675099392349
Mean Squared Error: 24.291119474973538
R2 Score (Model Accuracy): 0.6687594935356317

New Instances X:
[[1.0e-01 0.0e+00 6.0e+00 0.0e+00 5.0e-01 6.0e+00 6.0e+01 5.0e+00 3.0e+00
  3.0e+02 1.5e+01 3.8e+02 5.0e+00]
 [2.0e-01 0.0e+00 8.0e+00 1.0e+00 6.0e-01 7.0e+00 5.0e+01 7.0e+00 2.0e+00
  2.5e+02 1.6e+01 4.0e+02 1.0e+01]]
Predicted Output YHAT:
[26.90437544 26.70269715]
Mean of Actual Y Values: 22.532806324110677
YHAT - Y for new instances:
[4.37156912 4.16989083]
Square (YHAT - Y) for new instances:
[19.11061655 17.3879895 ]
Average (Sum (Square (YHAT - Y))): 18.249303025021256




In [18]:
#boston_house_price with GD
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_name = 'boston.csv'  # Change to your file name
data = pd.read_csv(file_name)

# Define features and target variable
X = data[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']]
y = data['MEDV']

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
X_train_mean = X_train.mean().values
X_train_std = X_train.std().values
X_train_normalized = (X_train - X_train_mean) / X_train_std
X_test_normalized = (X_test - X_train_mean) / X_train_std

# Add intercept term to X
X_train_b = np.c_[np.ones((X_train_normalized.shape[0], 1)), X_train_normalized]
X_test_b = np.c_[np.ones((X_test_normalized.shape[0], 1)), X_test_normalized]

# Gradient Descent parameters
eta = 0.01  # Learning rate
n_iterations = 1000  # Number of iterations
m = len(X_train_b)  # Number of training instances

# Initialize weights
theta = np.random.randn(X_train_b.shape[1])

# Gradient Descent
for iteration in range(n_iterations):
    gradients = 2/m * X_train_b.T.dot(X_train_b.dot(theta) - y_train)
    theta -= eta * gradients

# Predictions
y_pred_train = X_train_b.dot(theta)
y_pred_test = X_test_b.dot(theta)

# Calculate metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

# Print regression coefficients and metrics
print(f'Regression Coefficients (using GD): {theta[1:]}')
print(f'Intercept (using GD): {theta[0]}')
print(f'Training Mean Squared Error: {mse_train}')
print(f'Test Mean Squared Error: {mse_test}')
print(f'Training R2 Score: {r2_train}')
print(f'Test R2 Score: {r2_test}')

# New instances for prediction (provide the correct number of features as required)
X_new = np.array([[0.1, 0, 6, 0, 0.5, 6, 60, 5, 3, 300, 15, 380, 5],  # Replace with your new instances
                  [0.2, 0, 8, 1, 0.6, 7, 50, 7, 2, 250, 16, 400, 10]])
X_new_normalized = (X_new - X_train_mean) / X_train_std
X_new_b = np.c_[np.ones((X_new_normalized.shape[0], 1)), X_new_normalized]
y_new_pred = X_new_b.dot(theta)

# Print predictions for new instances
print(f'\nNew Instances X:\n{X_new}')
print(f'Predicted Output YHAT:\n{y_new_pred}')

# Calculate additional metrics for new instances
y_mean = np.mean(y)  # Mean of actual y values from the original dataset

# Calculate YHAT - Y and squared errors for new instances
y_new_errors = y_new_pred - y_mean
squared_errors_new = y_new_errors ** 2
average_squared_error = np.mean(squared_errors_new)

print(f'YHAT - Y for new instances:\n{y_new_errors}')
print(f'Square (YHAT - Y) for new instances:\n{squared_errors_new}')
print(f'Average (Sum (Square (YHAT - Y))): {average_squared_error}')



Regression Coefficients (using GD): [-0.95269528  0.57794702  0.08912771  0.74613811 -1.93123816  3.20285309
 -0.18624416 -2.98615816  1.69567861 -1.16457327 -2.01484019  1.13011911
 -3.59749038]
Intercept (using GD): 22.79653461685836
Training Mean Squared Error: 21.690497021801157
Test Mean Squared Error: 24.69517347520638
Training R2 Score: 0.7503206267420094
Test R2 Score: 0.6632497000568267

New Instances X:
[[1.0e-01 0.0e+00 6.0e+00 0.0e+00 5.0e-01 6.0e+00 6.0e+01 5.0e+00 3.0e+00
  3.0e+02 1.5e+01 3.8e+02 5.0e+00]
 [2.0e-01 0.0e+00 8.0e+00 1.0e+00 6.0e-01 7.0e+00 5.0e+01 7.0e+00 2.0e+00
  2.5e+02 1.6e+01 4.0e+02 1.0e+01]]
Predicted Output YHAT:
[27.04041779 27.04663251]
YHAT - Y for new instances:
[4.50761146 4.51382619]
Square (YHAT - Y) for new instances:
[20.31856109 20.37462687]
Average (Sum (Square (YHAT - Y))): 20.34659398237087
