In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Load the datasets
turbine_data = pd.read_csv('A1-turbine.txt', sep='\t', header=None, comment='#')
synthetic_data = pd.read_csv('A1-synthetic.txt', sep='\t', header=None, comment='#')

# Split the turbine data into input data and target output
turbine_data_input = turbine_data.iloc[:, 1:].values
turbine_data_output = turbine_data.iloc[:, 0].values

# Split the synthetic data into input data and target output
synthetic_data_input = synthetic_data.iloc[:, 1:].values
synthetic_data_output = synthetic_data.iloc[:, 0].values

# Display the first few rows of the datasets
print('Turbine Data:')
print(turbine_data.head())
print('\nSynthetic Data:')
print(synthetic_data.head())

In [None]:
# Define a function to calculate the Mean Absolute Percentage Error (MAPE)
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Train an MLR model on the turbine data
mlr_turbine = LinearRegression()
mlr_turbine.fit(turbine_data_input_train, turbine_data_output_train)

# Make predictions on the turbine test set
turbine_predictions = mlr_turbine.predict(turbine_data_input_test)

# Calculate the MAPE for the turbine predictions
turbine_mape = calculate_mape(turbine_data_output_test, turbine_predictions)
print('Turbine Data: MAPE =', turbine_mape)

# Train an MLR model on the synthetic data
mlr_synthetic = LinearRegression()
mlr_synthetic.fit(synthetic_data_input_train, synthetic_data_output_train)

# Make predictions on the synthetic test set
synthetic_predictions = mlr_synthetic.predict(synthetic_data_input_test)

# Calculate the MAPE for the synthetic predictions
synthetic_mape = calculate_mape(synthetic_data_output_test, synthetic_predictions)
print('Synthetic Data: MAPE =', synthetic_mape)

In [None]:
# Split the turbine data into training and test sets
turbine_data_input_train, turbine_data_input_test, turbine_data_output_train, turbine_data_output_test = train_test_split(turbine_data_input, turbine_data_output, test_size=0.2, random_state=42)

# Split the synthetic data into training and test sets
synthetic_data_input_train, synthetic_data_input_test, synthetic_data_output_train, synthetic_data_output_test = train_test_split(synthetic_data_input, synthetic_data_output, test_size=0.2, random_state=42)

# Display the shapes of the training and test sets
print('Turbine Data: Training set shape:', turbine_data_input_train.shape, 'Test set shape:', turbine_data_input_test.shape)
print('Synthetic Data: Training set shape:', synthetic_data_input_train.shape, 'Test set shape:', synthetic_data_input_test.shape)

In [None]:
# Train an MLR model on the turbine data
mlr_turbine = LinearRegression()
mlr_turbine.fit(turbine_data_input_train, turbine_data_output_train)

# Make predictions on the turbine test set
turbine_predictions = mlr_turbine.predict(turbine_data_input_test)

# Calculate the MAPE for the turbine predictions
turbine_mape = calculate_mape(turbine_data_output_test, turbine_predictions)
print('Turbine Data: MAPE =', turbine_mape)

# Train an MLR model on the synthetic data
mlr_synthetic = LinearRegression()
mlr_synthetic.fit(synthetic_data_input_train, synthetic_data_output_train)

# Make predictions on the synthetic test set
synthetic_predictions = mlr_synthetic.predict(synthetic_data_input_test)

# Calculate the MAPE for the synthetic predictions
synthetic_mape = calculate_mape(synthetic_data_output_test, synthetic_predictions)
print('Synthetic Data: MAPE =', synthetic_mape)

In [None]:
import matplotlib.pyplot as plt

# Create a scatter plot for the turbine data
plt.figure(figsize=(10, 5))
plt.scatter(turbine_data_output_test, turbine_predictions)
plt.xlabel('Real Values')
plt.ylabel('Predicted Values')
plt.title('Turbine Data: Real vs Predicted Values')
plt.grid(True)
plt.show()

# Create a scatter plot for the synthetic data
plt.figure(figsize=(10, 5))
plt.scatter(synthetic_data_output_test, synthetic_predictions)
plt.xlabel('Real Values')
plt.ylabel('Predicted Values')
plt.title('Synthetic Data: Real vs Predicted Values')
plt.grid(True)
plt.show()

In [None]:
from sklearn.datasets import load_boston

# Load the Boston Housing Dataset
boston = load_boston()
boston_data = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_data['MEDV'] = boston.target

# Display the first few rows of the dataset
boston_data.head()

In [None]:
# Split the dataset into input data and target output
boston_data_input = boston_data.iloc[:, :-1].values
boston_data_output = boston_data.iloc[:, -1].values

# Split the dataset into training and test sets
boston_data_input_train, boston_data_input_test, boston_data_output_train, boston_data_output_test = train_test_split(boston_data_input, boston_data_output, test_size=0.2, random_state=42)

# Display the shapes of the training and test sets
print('Boston Housing Data: Training set shape:', boston_data_input_train.shape, 'Test set shape:', boston_data_input_test.shape)

In [None]:
from sklearn.datasets import load_boston

# Load the Boston Housing Dataset
boston = load_boston()
boston_data = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_data['MEDV'] = boston.target

# Display the first few rows of the dataset
boston_data.head()

In [None]:
from sklearn.model_selection import train_test_split

# Split the Boston Housing data into input data and target output
boston_data_input = boston_data.drop('MEDV', axis=1).values
boston_data_output = boston_data['MEDV'].values

# Split the Boston Housing data into training and test sets
boston_data_input_train, boston_data_input_test, boston_data_output_train, boston_data_output_test = train_test_split(boston_data_input, boston_data_output, test_size=0.2, random_state=42)

# Display the shapes of the training and test sets
print('Boston Housing Data: Training set shape:', boston_data_input_train.shape, 'Test set shape:', boston_data_input_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression

# Train an MLR model on the Boston Housing data
mlr_boston = LinearRegression()
mlr_boston.fit(boston_data_input_train, boston_data_output_train)

# Make predictions on the Boston Housing test set
boston_predictions = mlr_boston.predict(boston_data_input_test)
 

In [None]:
import numpy as np

# Define a function to calculate the Mean Absolute Percentage Error (MAPE)
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Train an MLR model on the Boston Housing data
mlr_boston = LinearRegression()
mlr_boston.fit(boston_data_input_train, boston_data_output_train)

# Make predictions on the Boston Housing test set
boston_predictions = mlr_boston.predict(boston_data_input_test)

# Calculate the MAPE for the Boston Housing predictions
boston_mape = calculate_mape(boston_data_output_test, boston_predictions)
print('Boston Housing Data: MAPE =', boston_mape)

In [None]:
import matplotlib.pyplot as plt

# Create a scatter plot for the Boston Housing data
plt.figure(figsize=(10, 5))
plt.scatter(boston_data_output_test, boston_predictions)
plt.xlabel('Real Values')
plt.ylabel('Predicted Values')
plt.title('Boston Housing Data: Real vs Predicted Values')
plt.grid(True)
plt.show()