In [1]:
import pandas as pd
import numpy as np
import sktime
import seaborn as snsc
import matplotlib.pyplot as plt
from convertcsv.import_preprocess_v4 import readcsvs
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sktime.transformations.panel.rocket import Rocket
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sktime.classification.kernel_based import RocketClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.impute import KNNImputer
from convertcsv.get_all_metrics_with_tags import get_all_metrics_with_tags
from visualization import graphs
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sktime.datatypes import convert_to
from sktime.datatypes import MTYPE_REGISTER
from collections import Counter
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.compose import ClassifierPipeline
from storage.retrieve_csv import retrieve_csv
from storage.store_csv import store_csv

In [9]:
X, y = retrieve_csv("scaled_and_trimmed")
variances = pd.read_json("./storage/sorted_variance.json",orient="index")
variances.size

32

In [3]:
class Try_Classifiers:
    #Goal: run the various classifiers. Do Normalization/standarization outside of this function.
    #Then in here, only classifiers should be needed. All classifiers implement fit and fit_transform as well as fit_predict.
    #If there are multiple classes that passed in, make the pipeline fit_transform -> fit_predict. Otherwise just fit_predict.
    
    def __init__(self, X_train:pd.DataFrame, y_train=None, X_test=None, y_test=None):
        self.formats = [self.format_dflist, self.format_multiindex, self.format_multiindex_manual, self.format_multiindex_dflist]
        self.X_train_cols = X_train.columns
        self.X_train_index = X_train.index
        self.X_train = X_train
        if y_train is not None:
            self.y_train = y_train
        if X_test is not None:
            self.X_test = X_test
        if y_test is not None:
            self.y_test = y_test


    def _try_function_with_formats(self, class_function, X, y=None):
        latestexception:Exception
        try:
            result = None
            if y is not None:
                result = class_function(X, y)
            else:
                result = class_function(X)
            return result, True
        except Exception as e:    
            latestexception = e      
            for fmt in self.formats:
                try:
                    formatted_X = fmt(X)
                    result = None
                    if y is not None: 
                        result = class_function(formatted_X, y)
                    else:
                        result = class_function(formatted_X)
                    return result, True
                except Exception as e:
                    latestexception = e
                    continue
            print(latestexception)
            return None, False
    
    def format_dflist(self, input):
        return convert_to(input, to_type="df-list")

    def format_multiindex(self, input):
        return convert_to(input, to_type="pd-multiindex")

    def format_multiindex_manual(self, input):
        return pd.DataFrame(input, columns=self.X_train_cols, index=self.X_train_index)
    
    def format_multiindex_dflist(self, input):
        return convert_to(pd.DataFrame(input, columns=self.X_train_cols, index=self.X_train_index), to_type="df-list")

    def run_fit_predict_single(self, class_to_use):
        
        result, completed = self._try_function_with_formats(class_to_use.fit,self.X_train, self.y_train)
        
        if not completed:
            return Exception("Couldn't fit")

        if hasattr(class_to_use, 'score'):
            score, completed = self._try_function_with_formats(class_to_use.predict, self.X_test, self.y_test)
        else:
            prediction, completed = self._try_function_with_formats(class_to_use.predict, self.X_test)
            score = np.mean(prediction == y_test)
            
        
        if not completed:
            return Exception("Something went wrong")

#It runs the local fit function before sending it to the loop that catches exceptions
#The way to fix this is make sure that the fit and predict functions happen inside the loop. It has to handle multiple inputs or just one


In [4]:
def calculate_variance(df:pd.DataFrame) ->pd.Series:
    #The coefficient of variation
    cv = df.std() / df.mean()
    return cv.round(10).sort_values(ascending=False)

In [6]:
def select_by_variance(df:pd.DataFrame, amount:int):
    variances:pd.Series = calculate_variance(df)
    selection = variances.iloc[0:amount]
    return selection.index
best_features = select_by_variance(X, 5)
X = X[best_features]
best_features

Index(['go_memstats_heap_idle_bytes&catalogue:80&catalogue',
       'go_memstats_mspan_inuse_bytes&catalogue:80&catalogue',
       'go_memstats_alloc_bytes&catalogue:80&catalogue',
       'go_memstats_heap_alloc_bytes&catalogue:80&catalogue',
       'go_memstats_stack_sys_bytes&user:80&user'],
      dtype='object')