In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Loading the dataset
df = pd.read_csv("PV_Data.csv")

# Drop rows with missing values
df = df.dropna()

# Descriptive statistics
length_col = len(df.columns)
length_ob = len(df)
print("Number of attributes:", length_col, "Number of observations:", length_ob)

df.describe()


In [None]:
# Graphic Summaries

#histograms
plt.figure(figsize=(10, 8))
sns.histplot(df['Voltage - String 1'])
plt.title('Histogram - Voltage - String 1')
plt.show()

plt.figure(figsize=(10, 8))
sns.histplot(df['Voltage - String 2'])
plt.title('Histogram - Voltage - String 2')
plt.show()

plt.figure(figsize=(10, 8))
sns.histplot(df['Current - String 1'])
plt.title('Histogram - Current - String 1')
plt.show()

plt.figure(figsize=(10, 8))
sns.histplot(df['Current - String 2'])
plt.title('Histogram - Current - String 2')
plt.show()

plt.figure(figsize=(10, 8))
sns.histplot(df['Irradiance'])
plt.title('Histogram - Irradiance')
plt.show()

plt.figure(figsize=(10, 8))
sns.histplot(df['PV Module Temperature'])
plt.title('Histogram - PV Module Temperature')
plt.show()


In [None]:
#boxplots
plt.figure(figsize=(10, 8))
sns.boxplot(df['Voltage - String 1'])
plt.title('Boxplot - Voltage - String 1')
plt.show()

plt.figure(figsize=(10, 8))
sns.boxplot(df['Voltage - String 2'])
plt.title('Boxplot - Voltage - String 2')
plt.show()

plt.figure(figsize=(10, 8))
sns.boxplot(df['Current - String 1'])
plt.title('Boxplot - Current - String 1')
plt.show()

plt.figure(figsize=(10, 8))
sns.boxplot(df['Current - String 2'])
plt.title('Boxplot - Current - String 2')
plt.show()

plt.figure(figsize=(10, 8))
sns.boxplot(df['Irradiance'])
plt.title('Boxplot - Irradiance')
plt.show()

plt.figure(figsize=(10, 8))
sns.boxplot(df['PV Module Temperature'])
plt.title('Boxplot - PV Module Temperature')
plt.show()



In [None]:

#Heatmap
feature_columns = df[df.columns[:6]]
sns.heatmap(feature_columns.corr(), annot=True, cmap='coolwarm')
plt.title('Heatmap')
plt.show()


In [None]:
#Pairplot
sns.pairplot(feature_columns)
plt.title('Pairplot')
plt.show()

In [None]:
# Preprocess the data
data_numpy = df.to_numpy()
X = data_numpy[:, :6]
y = data_numpy[:, 6]

scaler = preprocessing.MinMaxScaler()
X_scaled_min_max = scaler.fit_transform(X)

scaler = preprocessing.StandardScaler()
X_scaled_standard_sc = scaler.fit_transform(X)

In [None]:
# Model training and evaluation
classifiers = {
    'Decision Trees': DecisionTreeClassifier(),
    'k-NN': KNeighborsClassifier(n_neighbors=5),
    'Naïve Bayes': GaussianNB(),
    'SVM (RBF)': SVC(kernel='rbf'),  # SVM with RBF kernel
    'SVM (Poly)': SVC(kernel='poly'),  # SVM with Polynomial kernel
    'Neural Networks': MLPClassifier()
}

for features in [X_scaled_min_max, X_scaled_standard_sc]:
    for initial_state in [1, 20, 40]:
        X_train, X_test, y_train, y_test = train_test_split(features, y, random_state=initial_state, test_size=0.3)

        results = {'Classifier': [], 'Accuracy': []}

        for classifier_name, clf in classifiers.items():
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

            results['Classifier'].append(classifier_name)
            results['Accuracy'].append(accuracy)

        print('\nResults for Features ({features}) with Random State {initial_state}:\n')
        results_df = pd.DataFrame(results)
        print(results_df)
