In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline

# Read data
df = pd.read_excel("Actuals.xlsx")

# Separate features and target variable
y1 = df['Load (kW)']  # Target variable
X1 = df.drop(columns=['Load (kW)'])  # Features

# Extract relevant information from datetime feature
X1['Year'] = X1['Time'].dt.year
X1['Month'] = X1['Time'].dt.month
X1['Day'] = X1['Time'].dt.day
X1['Hour'] = X1['Time'].dt.hour
# Drop the original datetime feature
X1.drop(columns=['Time'], inplace=True)

# Remove leading and trailing whitespaces from column names
X1.columns = X1.columns.str.strip()

# Define numerical and categorical features
numeric_features = ['Pressure_kpa', 'Cloud Cover (%)', 'Humidity (%)', 'Temperature (C)', 'Wind Direction (deg)', 'Wind Speed (kmh)', 'Year', 'Month', 'Day', 'Hour']
categorical_features = []

# Define preprocessing steps for numerical and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder())
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Apply preprocessing to the data
try:
    X_preprocessed = preprocessor.fit_transform(X1)
except KeyError as e:
    print(f"KeyError occurred: {e}")
    print("Ensure all columns specified in numeric_features and categorical_features are present in the DataFrame.")


In [None]:


# Features to remove
features_to_remove = ['Wind Direction (deg)','Day', 'Year', 'Month','Humidity (%)']


# Remove the specified columns from X_preprocessed
columns_to_remove = [numeric_features.index(feature) for feature in features_to_remove]
X_preprocessed = np.delete(X_preprocessed, columns_to_remove, axis=1)

# Update the numeric_features list
numeric_features = [feature for feature in numeric_features if feature not in features_to_remove]


In [None]:


# Features to remove
features_to_remove = ['Wind Direction (deg)','Day', 'Year', 'Month','Humidity (%)']


# Remove the specified columns from X_preprocessed
columns_to_remove = [numeric_features.index(feature) for feature in features_to_remove]
X_preprocessed = np.delete(X_preprocessed, columns_to_remove, axis=1)

# Update the numeric_features list
numeric_features = [feature for feature in numeric_features if feature not in features_to_remove]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Split the preprocessed data and normalized target variable into train and test sets
X1_train, X1_test, y1_train, y1_test = train_test_split(X_preprocessed, y_normalized, test_size=0.1, random_state=42)


# Update model instantiation with best parameters
reg_model = linear_model.LinearRegression(copy_X=True, fit_intercept=False, positive=False)

# Fit the model with updated parameters
reg_model.fit(X1_train, y1_train)


# Make predictions on the test data
y1_pred = reg_model.predict(X1_test)

# Flatten the predicted values to 1-dimensional array
y1_pred_flat = y1_pred.flatten()

# Compare the predicted values with the actual values
comparison1 = pd.DataFrame({'Actual': y1_test.flatten(), 'Predicted': y1_pred_flat})
print(comparison1)


In [None]:
from sklearn.metrics import r2_score, mean_squared_error

# Calculate R-squared value
r_squared = r2_score(y1_test, y1_pred)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y1_test, y1_pred)

# Format the output to display up to two decimal points
formatted_r_squared = "{:.2f}".format(r_squared)
formatted_mse = "{:.2f}".format(mse)

print("R-squared score:", formatted_r_squared)
print("Mean Squared Error (MSE):", formatted_mse)


In [None]:
# Get the coefficients of the linear regression model
coefficients = reg_model.coef_[0]

# Get the absolute values of the coefficients for better comparison
abs_coefficients = np.abs(coefficients)

# Create a DataFrame to store feature names and their corresponding coefficients
feature_coefficients = pd.DataFrame({
    'Feature': np.array(numeric_features),  # Update with remaining numeric features
    'Coefficient': abs_coefficients
})

# Sort the features based on their coefficients in descending order
feature_coefficients = feature_coefficients.sort_values(by='Coefficient', ascending=False)

# Print the features with their coefficients
print("Features with Discriminating Power (Ranked by Coefficient):")
print(feature_coefficients)


In [None]:
from sklearn.model_selection import GridSearchCV


# Define the hyperparameters grid
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'positive': [True, False]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(reg_model, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1)

# Perform grid search
grid_search.fit(X1_train, y1_train)

# Print the best parameters found
print("Best Parameters:", grid_search.best_params_)

# Print the best MSE found
print("Best Mean Squared Error:", -grid_search.best_score_)
