Energy prediction project

1. Import Libraries

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt
import seaborn as sns
import os





2. Load and Prepare Data

In [18]:

# Load data
train_data = pd.read_csv('/home/ahmed/Documents/GitHub/Energy-prediction-IOT/Data set/training.csv')
test_data = pd.read_csv('/home/ahmed/Documents/GitHub/Energy-prediction-IOT/Data set/testing.csv')

# Drop irrelevant features
train_data = train_data.drop(columns=['rv1', 'rv2'])
test_data = test_data.drop(columns=['rv1', 'rv2'])

# Separate features and target variable
X_train = train_data.drop(columns=['Appliances'])
y_train = train_data['Appliances']
X_test = test_data.drop(columns=['Appliances'])
y_test = test_data['Appliances']

# Define categorical and numerical features
categorical_features = ['WeekStatus', 'Day_of_week']
numerical_features = X_train.select_dtypes(include=['int64', 'float64']).columns

# Create preprocessing pipelines for both types of features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_features)
    ]
)

# Apply preprocessing
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)


3. Feature Selection 

In [19]:

# Initialize RFE with RandomForestRegressor
rfe = RFE(estimator=RandomForestRegressor(), n_features_to_select=20)
rfe.fit(X_train_preprocessed, y_train)

# Get selected features
# Since ColumnTransformer and OneHotEncoder change feature names, we need to use the transformed feature names
feature_names = np.array(preprocessor.transformers_[1][1].named_steps['onehot'].get_feature_names_out())
all_feature_names = np.concatenate([numerical_features, feature_names])
selected_features = all_feature_names[rfe.support_]

print("Selected Features:", selected_features)


Selected Features: ['RH_1' 'T2' 'RH_2' 'T3' 'RH_3' 'T4' 'RH_4' 'T5' 'RH_5' 'T6' 'RH_6' 'RH_7'
 'T8' 'RH_8' 'T9' 'RH_9' 'Press_mm_hg' 'RH_out' 'Tdewpoint' 'NSM']
