# **Regression Models**

In [1]:
# import necessary libraries
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# load the solar energy dataset into a pandas dataframe
df = pd.read_csv('/content/ALDAR-CP1.csv')

# select the independent and dependent variables for the model
X = df[['t (°C)', 'VAR', 'E (kWh)']]
y = df['P (W)']

# split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# initialize a linear regression model
lr = LinearRegression()

# train the model on the training data
lr.fit(X_train, y_train)

# use the model to make predictions on the testing data
y_pred = lr.predict(X_test)

# evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Mean Squared Error:', mse)
print('R-squared Score:', r2)


Mean Squared Error: 2060.377462055457
R-squared Score: 0.27085674332877885


In [2]:
# Import the required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)']]
y = df['E (kWh)']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform feature scaling on the input variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a Ridge model and fit it to the training data
model = Ridge(alpha=0.1, solver='cholesky')
model.fit(X_train_scaled, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test_scaled)

# Evaluate the model's performance using mean squared error and R-squared score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R-squared Score:', r2)


Mean Squared Error: 1218702.9733529892
R-squared Score: 0.013084438977701174


In [3]:
# Import the required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)']]
y = df['E (kWh)']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest model and fit it to the training data
model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance using mean squared error and R-squared score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R-squared Score:', r2)


Mean Squared Error: 1252136.4163900528
R-squared Score: -0.013990234600102402


In [4]:
# Import the required libraries
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)']]
y = df['E (kWh)']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVR model and fit it to the training data
model = SVR(kernel='rbf', gamma='scale', C=1000)
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance using mean squared error and R-squared score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R-squared Score:', r2)


Mean Squared Error: 1352071.5596804658
R-squared Score: -0.09491852489133712


# **Classification Models**

In [5]:
# Import the required libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Define a new column for device status
df['Device_Status'] = df['Status'].apply(lambda x: 'Normal' if x == 0 else 'Abnormal')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)', 'E (kWh)']]
y = df['Device_Status']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree Classifier and fit it to the training data
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance using accuracy score and classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print('Accuracy:', accuracy)
print('Classification Report:\n', report)


Accuracy: 0.993663594470046
Classification Report:
               precision    recall  f1-score   support

    Abnormal       1.00      1.00      1.00      1732
      Normal       0.00      0.00      0.00         4

    accuracy                           0.99      1736
   macro avg       0.50      0.50      0.50      1736
weighted avg       1.00      0.99      0.99      1736



In [7]:
# Import the required libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Define a new column for device status
df['Device_Status'] = df['Status'].apply(lambda x: 'Normal' if x == 0 else 'Abnormal')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)', 'E (kWh)']]
y = df['Device_Status']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree Classifier and fit it to the training data
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance using accuracy score, F1 score, and classification report
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, pos_label='Abnormal')
report = classification_report(y_test, y_pred)

print('Accuracy:', accuracy)
print('F1 Score:', f1)
print('Classification Report:\n', report)


Accuracy: 0.993663594470046
F1 Score: 0.9968217278243282
Classification Report:
               precision    recall  f1-score   support

    Abnormal       1.00      1.00      1.00      1732
      Normal       0.00      0.00      0.00         4

    accuracy                           0.99      1736
   macro avg       0.50      0.50      0.50      1736
weighted avg       1.00      0.99      0.99      1736



In [9]:
# Import the required libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from imblearn.under_sampling import RandomUnderSampler

# Load the dataset into a pandas DataFrame
df = pd.read_csv('/content/ALDAR-CP1.csv')

# Define a new column for device status
df['Device_Status'] = df['Status'].apply(lambda x: 'Normal' if x == 0 else 'Abnormal')

# Select the input and output variables
X = df[['t (°C)', 'P (W)', 'Freq (Hz)', 'E (kWh)']]
y = df['Device_Status']

# Apply undersampling to balance the classes
undersampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X, y)

# Split the resampled dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Create a Decision Tree Classifier and fit it to the training data
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance using accuracy score, F1 score, and classification report
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, pos_label='Abnormal')
report = classification_report(y_test, y_pred)

print('Accuracy:', accuracy)
print('F1 Score:', f1)
print('Classification Report:\n', report)


Accuracy: 0.8666666666666667
F1 Score: 0.9
Classification Report:
               precision    recall  f1-score   support

    Abnormal       0.90      0.90      0.90        10
      Normal       0.80      0.80      0.80         5

    accuracy                           0.87        15
   macro avg       0.85      0.85      0.85        15
weighted avg       0.87      0.87      0.87        15

