In [150]:
import torch
import subprocess
import pandas as pd
import sklearn
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [166]:
class MLAPI:
    device = None
    nvidia = False
    dataset_name = None
    dataset = None
    model = None
    
    def __init__(self, checkpoint=""):
        try:
            subprocess.check_output('nvidia-smi')
            nvidia = True
            print("Nvidia drivers available!")
        except Exception: 
            # this command not being found can raise quite a few 
            # different errors depending on the configuration
            print('No Nvidia GPU in system!')
        try:
            self.device = torch.device('cuda:0')
            print("GPU available!")
        except:
            print("No GPU available in system!")
        

    def set_local_csv_dataset(self, dataset="./Data/archive 2/Iris.csv"):
        self.dataset_name = dataset
        try:
            self.dataset = pd.read_csv(self.dataset_name)
        except:
            print("Not CSV")

    def set_local_json_dataset(self, dataset):
        pass

    def logistic_regression(self,label, lr=1e-4, test_size=0.25, random_state=42, columns=None, max_epochs=100):
        df_encoded = pd.get_dummies(self.dataset, drop_first=True)
        y = self.dataset[label]
        if columns == None:
            #assume all other columns and set X_columns to all features not label
            X_columns = [col for col in self.dataset.columns if label not in col]
        else:
            # use only given columns
            X_columns = columns
        X = self.dataset[X_columns]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        self.model = LogisticRegression(max_iter=max_epochs)
        self.model.fit(X_train, y_train)
        accuracy = self.model.score(X_test, y_test)
        print(f"Model Accuracy: {accuracy}")
        return self.model

    def linear_regression(self,label, lr=1e-4, test_size=0.25, random_state=42, columns=None, max_epochs=100):
        df_encoded = pd.get_dummies(self.dataset, drop_first=True)

        #TODO: get columns from df_encoded if categorical data that is one hot encoded
        
        y = df_encoded[label]
        if columns == None:
            #assume all other columns and set X_columns to all features not label
            X_columns = [col for col in df_encoded.columns if label not in col]
        else:
            # use only given columns
            X_columns = columns
        X = df_encoded[X_columns]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        self.model = LinearRegression()
        self.model.fit(X_train, y_train)
        accuracy = self.model.score(X_test, y_test)
        print(f"Model Accuracy: {accuracy}")
        return self.model

    def svm(self,label, lr=1e-4, test_size=0.25, random_state=42, columns=None, max_epochs=100):
        # TODO: METHOD STUB
        pass

    def decision_tree(self,label, lr=1e-4, test_size=0.25, random_state=42, columns=None, max_epochs=100):
        y = self.dataset[label]
        if columns == None:
            #assume all other columns and set X_columns to all features not label
            X_columns = [col for col in self.dataset.columns if label not in col]
        else:
            # use only given columns
            X_columns = columns
        X = self.dataset[X_columns]

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        self.model = DecisionTreeClassifier(random_state = random_state)
        self.model.fit(X_train, y_train)
        y_pred = self.model.predict(X_test)
        accuracy = self.model.score(X_test, y_test)
        print(f"Decision Tree Model Accuracy: {accuracy}")
        return self.model
                

In [167]:
api = MLAPI()
api.set_local_csv_dataset()
print(api.dataset)

No Nvidia GPU in system!
GPU available!
      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
4      5            5.0           3.6            1.4           0.2   
..   ...            ...           ...            ...           ...   
145  146            6.7           3.0            5.2           2.3   
146  147            6.3           2.5            5.0           1.9   
147  148            6.5           3.0            5.2           2.0   
148  149            6.2           3.4            5.4           2.3   
149  150            5.9           3.0            5.1           1.8   

            Species  
0       Iris-setosa  
1       Iris-setosa  
2       Iris-setosa  
3       Iris-setosa  
4       I

python(80982) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [168]:
print(api.logistic_regression('Species', max_epochs=700))
print(api.linear_regression('Species_Iris-versicolor', max_epochs=700))
print(api.decision_tree('Species', max_epochs=700))

Model Accuracy: 1.0
LogisticRegression(max_iter=700)
Model Accuracy: 0.9469908228307156
LinearRegression()
Decision Tree Model Accuracy: 1.0
DecisionTreeClassifier(random_state=42)


In [169]:
!jupyter nbconvert --to script 'ML_API.ipynb'

python(82572) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[NbConvertApp] Converting notebook ML_API.ipynb to script
[NbConvertApp] Writing 4917 bytes to ML_API.py
