In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import StackingRegressor
import lightgbm as lgb
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Import the data
df = pd.read_csv('ws_updated_p.csv')

# Select relevant columns
df = df[['Temp 1', 'Pressure 2', 'Light 1', 'UV', 'Humidity', 'Power']]

# Discretize continuous variables into categorical bins
df['Temp Category'] = pd.cut(df['Temp 1'], bins=[-np.inf, 20,30,40, np.inf], labels=['very Low', 'Minimum', 'Medium', 'Maximum'])

# Separating features (X) and target variable (y)
X = df[['Pressure 2', 'Light 1', 'Power', 'Humidity', 'UV']]  # Independent variables
y = df['Temp Category']  # Target variable

# Introduce 5% missing data
np.random.seed(42)
n_missing = int(X.size * 1.5)
missing_indices = (np.random.randint(0, X.shape[0], n_missing), np.random.randint(0, X.shape[1], n_missing))
X.values[missing_indices] = np.nan

# Impute missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Convert categorical variables into dummy variables
X_imputed_df = pd.DataFrame(X_imputed, columns=X.columns)
X_encoded = pd.get_dummies(X_imputed_df)

# Apply Chi-Square test for feature selection
selector = SelectKBest(chi2, k=5)  # Select top 5 features based on Chi-Square test
X_selected = selector.fit_transform(X_encoded, y)  # Fit selector to X and transform X

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Encode the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Define base models
base_models = [
    ('lgb', lgb.LGBMRegressor(random_state=42)),
    ('mvr', LinearRegression()),
    ('xgb', XGBRegressor(random_state=42))
]

# Define the stacking regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=lgb.LGBMRegressor(random_state=42),
    passthrough=True
)

# Train the stacking regressor
stacking_regressor.fit(X_train, y_train_encoded)

# Make predictions on the test set
y_pred = stacking_regressor.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test_encoded, y_pred)
mse = mean_squared_error(y_test_encoded, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_encoded, y_pred)

# Print the evaluation metrics
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")
