In [2]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.impute import SimpleImputer
import plotly.express as px

In [None]:
df = pd.read_csv("BulkDataLoaderGroupedData_1336_2023_12_17_12_00_00_AM_2023_12_23_11_59_59_PM__WGN Dec23 data v2.csv")

patterns = [
    "Active_Power",
    "Gearbox_Lube_Oil_Pressure_Status",
    "Gen_RPM_CCU",
    "L1_Current"
]

# Identify columns matching the patterns
columns_for_all_turbines = [col for col in df.columns if any(pattern in col for pattern in patterns)]

# Removes columns related to turbine 60 and 61 since they do not have the Pressure_Status columns
missing_columns = [col for col in df.columns if "0060" or "0061" in col]
df = df.dropna(subset=missing_columns)

# Filter the dataframe based on these columns
df_filtered_all_turbines = df[columns_for_all_turbines]

df = df_filtered_all_turbines

df = df.apply(pd.to_numeric, errors='coerce')


active_power_columns = [col for col in df.columns if "Active_Power" in col]
for col in active_power_columns:
    df[col] = df[col].where(df[col] <= 0)

Gearbox_Lube_Oil_Pressure_Status_Columns = [col for col in df.columns if "Gearbox_Lube_Oil_Pressure_Status" in col]


df = df.dropna(subset=active_power_columns)

for col in active_power_columns:
    df[col + "_Diff"] = df[col].diff()

window_size = 30

for col in Gearbox_Lube_Oil_Pressure_Status_Columns:
    df[col + "_Rolling_Avg"] = df[col].rolling(window = window_size).sum() / window_size



In [None]:
imputer = SimpleImputer(strategy='mean')
df_filled = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

scaler = StandardScaler()
df_normalized = scaler.fit_transform(df_filled)

inertia = []
k_range = range(1, 20)  
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df_normalized)
    inertia.append(kmeans.inertia_)

"""
# Plotting the Elbow Method 
import matplotlib.pyplot as plt
plt.plot(k_range, inertia, '-o')
plt.xlabel('Number of clusters, k')
plt.ylabel('Inertia')
plt.title('Elbow Method For Optimal k')
plt.show()
"""
"""
# Calculate Silhouette Score for each k
for k in k_range[1:]:  # silhouette score can't be calculated for k=1
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df_normalized)
    score = silhouette_score(df_normalized, kmeans.labels_)
    print(f"Silhouette Score for k={k}: {score}")
"""


chosen_k = 7 
kmeans = KMeans(n_clusters=chosen_k, random_state=42)
df['Cluster'] = kmeans.fit_predict(df_normalized)


In [None]:
clusterData = df[df['Cluster'] == 6]

clusterData.head(10)

In [None]:
for i in range(18):
    if i != 10 and i != 11:
        fig = px.scatter_3d(df, x='GN1_GE23xxx_WTG00{}_Gearbox_Lube_Oil_Pressure_Status_Rolling_Avg'.format(i+50), y='GN1_GE23xxx_WTG00{}_Active_Power_Diff'.format(i+50), z='GN1_GE23xxx_WTG00{}_Gen_RPM_CCU'.format(i+50),
                color='Cluster',
                labels={
                     'GN1_GE23xxx_WTG00{}_Gearbox_Lube_Oil_Pressure_Status_Rolling_Avg'.format(i+50): "GB_Pressure_Status_Rolling_Avg",
                     'GN1_GE23xxx_WTG00{}_Active_Power_Diff'.format(i+50): "Active_Power_Diff",
                     'GN1_GE23xxx_WTG00{}_Gen_RPM_CCU'.format(i+50): "Gen_RPM_CCU"
                 },
                 title = "Turbine {}".format(i+50))
        fig.show()

### Work in progress column combination code

In [None]:
column_list = ['Gearbox_Lube_Oil_Pressure_Status_Rolling_Avg', 'Active_Power_Diff','Gen_RPM_CCU']
for column_name in column_list:
    df[column_name] = df['GN1_GE23xxx_WTG0050_' + column_name]
    temp = pd.DataFrame()

for i in range(18):
    for column_name in column_list:
        if 'GN1_GE23xxx_WTG00' + str(51 + i) + '_' + column_name in df:
            temp[column_name] = df['GN1_GE23xxx_WTG00' + str(51 + i) + '_' + column_name]
            concat = True
        else:
            concat = False
    if concat == True:
        df = pd.concat([df, temp],axis = 0, ignore_index=True)

fig = px.scatter_3d(df, x='Gearbox_Lube_Oil_Pressure_Status_Rolling_Avg', y='Active_Power_Diff', z='Gen_RPM_CCU',
              color='Cluster')
fig.show()