In [6]:
import warnings
import os

import pandas as pd
import glob

import itertools

import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl

from sklearn import mixture
warnings.filterwarnings("ignore")

# function to pull the data
def data_pull():
    # Mounting the google drive
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)

    # Loading the twin data
    if not os.path.exists('twindata'):
      !unzip -q 'drive/My Drive/Data/twindata.zip'


    # Loading one year data to a dataframe
    path = r'twindata' # use your path
    all_files = glob.glob(path + "/*.csv")

    li = []

    for filename in sorted(all_files):
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    df_year = pd.concat(li, axis=0, ignore_index=True)
    # returning one year data
    return df_year
    

# Function to pe-process the irradiance and active power data
def preprocess_irradiance_activepower(df_year):
    # Load irradiance data
    rslt_irradiance = df_year[df_year['PME_MeasurementName'] == 'Irradiance Global (W/m^2)']
    rslt_irradiance['TimestampUTC'] = pd.to_datetime(rslt_irradiance['TimestampUTC'], format='%d/%m/%Y %I:%M:%S.%f %p')

    # Pre-processing: fill-up the NaN values of Irradiance by '0' at Night and 'Mean Value' at Day
    irradiance_day_mean = rslt_irradiance[(rslt_irradiance['TimestampUTC'].dt.hour <= 18) & (rslt_irradiance['TimestampUTC'].dt.hour >=6 )]['PME_Value'].mean()
    rslt_irradiance.loc[(rslt_irradiance['PME_Value'].isnull()) & ((rslt_irradiance['TimestampUTC'].dt.hour > 18) | (rslt_irradiance['TimestampUTC'].dt.hour < 6)), 'PME_Value'] = 0
    rslt_irradiance.loc[(rslt_irradiance['PME_Value'].isnull()) & ((rslt_irradiance['TimestampUTC'].dt.hour <= 18) & (rslt_irradiance['TimestampUTC'].dt.hour >= 6)), 'PME_Value'] = irradiance_day_mean

    # Load active power data
    rslt_activepower = df_year[(df_year['PME_SourceName'] == 'WP_SF_MVPS4.PM1') & (df_year['PME_MeasurementName'] == 'Active Power')]
    rslt_activepower['TimestampUTC'] = pd.to_datetime(rslt_activepower['TimestampUTC'], format='%d/%m/%Y %I:%M:%S.%f %p')

    # Renaming the columns by readable identifiers
    rslt_irradiance.rename(columns={'PME_Value': 'irradiance'}, inplace=True)
    rslt_activepower.rename(columns={'PME_Value': 'activepower'}, inplace=True)

    # Merging the data
    rslt_irradiance_select = rslt_irradiance[['TimestampUTC', 'irradiance']]
    rslt_activepower_select = rslt_activepower[['TimestampUTC','activepower']]
    df_merged = pd.merge(rslt_irradiance_select, rslt_activepower_select, on='TimestampUTC')

    # BEFORE THIS POINT, LOAD THE DATA FOR THE CHOICE OF ATTRIBUTES AND THE TIMERANGE BY THE USER

    # Convert the dataframe to array of values
    X = df_merged[['irradiance', 'activepower']].to_numpy()

    # Returning the values for further computations
    return X

# Function to PRODUCE the results
def att_ellipse(means, covariances, index, title, nclusters):
    ell_centers = [None]*nclusters
    ell_axislengths = [None]*nclusters
    ell_angles = [None]*nclusters
    # Find the attributes of the ellipse
    for i, (mean, covar) in enumerate(zip(
            means, covariances)):
        v, w = linalg.eigh(covar)
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        u = w[0] / linalg.norm(w[0])

        # Produce an ellipse to show the Gaussian component
        angle = np.arctan(u[1] / u[0])
        angle = 180. * angle / np.pi  # convert to degrees
        
        #print(mean, v[0], v[1], 180. + angle)
        ell_centers[i] = [i, mean[0], mean[1]]
        ell_axislengths[i] = [i, v[0], v[1]]
        ell_angles[i] = [i, 180. + angle]
    return ell_centers, ell_axislengths, ell_angles

In [7]:
if __name__ == "__main__":
    
    # Call the functions to load X
    res = data_pull()
    X = preprocess_irradiance_activepower(res)

    # Fit a Dirichlet process Gaussian mixture using five components
    cluster_numbers = 4;
    dpgmm = mixture.BayesianGaussianMixture(n_components=cluster_numbers,
                                            covariance_type='full').fit(X)
    XY_VALUES = X
    CLUSTERID = dpgmm.predict(X)

    ELL_CENTERS, ELL_AXISLENGTHS, ELL_ANGLES = att_ellipse(dpgmm.means_, dpgmm.covariances_, 1,
                'Bayesian Gaussian Mixture clustering', cluster_numbers)
    print(ELL_CENTERS)
    print(ELL_AXISLENGTHS)
    print(ELL_ANGLES)

ModuleNotFoundError: No module named 'google'

In [None]:
lists = []
for i,y in zip(XY_VALUES,CLUSTERID):
    d_ = { value:i , name:y }
    lists.append(d_)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import  clustering_prediction  as c

c.boot( 'WP_SF_MVPS4.WS1,WP_SF_MVPS4.PM1', 'Irradiance Global (W/m^2),Active Power', '2021-10-01 00:00:00' , '2021-11-01 00:00:00' , 4 )

[['92.16069793701172' '21.799999237060547']
 ['64.76708984375' '22.200000762939453']
 ['44.6308479309082' '22.600000381469727']
 ...
 ['179.25430297851562' '39.79999923706055']
 ['180.66973876953125' '41.79999923706055']
 ['183.28675842285156' '39.400001525878906']]
[2 2 2 ... 1 1 1]
[['439' '92.16069793701172']
 ['309' '64.76708984375']
 ['237' '44.6308479309082']
 ...
 ['943' '179.25430297851562']
 ['948' '180.66973876953125']
 ['971' '183.28675842285156']]
[0 3 3 ... 0 0 0]


In [5]:
i = 2 
j = [1,2,3]
ij = 10