In [None]:
# A Gentle Introduction to Machine Learning Algorithms
# Author:   Dr. Steven C. Lindo
# Date :    7/27/2021
# Description: Using SKLearn we can instrument
#              machine learning algorithms.
# -   -   -   -   -   -   -   -   -   -   -   -
# History
# Name      Date          Description
# scl       11/01/2022    Inital code draft
# scl       11/12/2022    Create framework for adding the other Classifiers
#                         Students should take this version as a starting point
# -   -   -   -   -   -   -   -   -   -   -   -   

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# machine learning library 
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split

# machine learning accuracy
from sklearn.metrics import accuracy_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
location ='/content/drive/MyDrive/Colab Notebooks/data'

# please note this change and "hack" if you will, only for 
# demo purposes with this implementation
list_of_files = os.listdir(location)
print(list_of_files)

['iris_data.csv', 'sports_class.csv', 'bc_data.csv', 'Salary_Data.csv']


In [None]:
# -    -    -    -    -    -    -    -    -    -    -
def f_bayes(X_train, X_test, y_train, y_test):
  model_performance = 0.0

  # reshape to get rid of the warning
  # not the most elegant, but it works
  y_test  = np.reshape(y_test, len(y_test), )
  y_train = np.reshape(y_train, len(y_train), )

  # train the model 
  model = GaussianNB()
  model.fit(X_train, y_train)

  # test the model
  y_pred = model.predict(X_test)

  # accuracy results
  model_performance = accuracy_score(y_test, y_pred)
  return model_performance.round(3)

# -    -    -    -    -    -    -    -    -    -    -
def f_svm(X_train, X_test, y_train, y_test):
  model_performance = 0.0

  # reshape to get rid of the warning
  # not the most elegant, but it works
  y_test  = np.reshape(y_test, len(y_test), )
  y_train = np.reshape(y_train, len(y_train), )

  # train the model 
  # model = GaussianNB()
  model = SVC()
  model.fit(X_train, y_train)

  # test the model
  y_pred = model.predict(X_test)

  # accuracy results
  model_performance = accuracy_score(y_test, y_pred)
  return model_performance.round(3)

# -    -    -    -    -    -    -    -    -    -    -
def f_decisionTrees(X_train, X_test, y_train, y_test):
  model_performance = 0.0
  
  return model_performance

# -    -    -    -    -    -    -    -    -    -    -
def f_nearestNeighbor(X_train, X_test, y_train, y_test):
  model_performance = 0.0


  return model_performance


In [None]:
def f_feature_selection(data):
  # scl - document this part of the code and explain 
  #       the use of python slicing and the comma
  #       this code assumes that 'class' = 'observed' is 
  #       last column in the dataFrame.
  features = data.iloc[:,:-1].values
  observed = data.iloc[:,-1:].values
  return features, observed

def f_makeTrainTestData(X, y):
  # split into training & testing
  xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.55, random_state=42)
  return xtrain, xtest, ytrain, ytest

def f_loadData(fn):
  dataframe = pd.read_csv(fn)
  return dataframe

def f_pairPlot(dataframe):
  sns.pairplot(dataframe, diag_kind='kde', hue='class')


In [None]:
def main():
  status = 0
  # Load the file you want to use for this experiment
  # simply chose from the file list above.  Reminder that
  # the first file is 0, so list_of_files[0] would be the first file.
  for i in range (len(list_of_files)):
    filename = location + '/' +list_of_files[i]
    df = f_loadData(filename)
    print(list_of_files[i])

    # create a chart
    # f_pairPlot(df)
    
    # do your feature section process and split into training and test sets
    X_features, y_observed = f_feature_selection(df)
    xtrain, xtest, ytrain, ytest = f_makeTrainTestData(X_features, y_observed)

    # train and test each models
    status =  f_bayes(xtrain, xtest, ytrain, ytest)
    print(filename, ' - Bayes model performance = ', status)

    status =  f_svm(xtrain, xtest, ytrain, ytest)
    print(filename, ' - SVM model performance   = ', status)

    status =  f_decisionTrees(xtrain, xtest, ytrain, ytest)
    print(filename, ' - DTree model performance = ', status)

    status =  f_nearestNeighbor(xtrain, xtest, ytrain, ytest)
    print(filename, ' - KNN model performance   = ', status)
    
    print('\n-    -    -    -    Next File    -    -    -    -\n')

    
  print('\n-    -    -    -    -  E N D  -    -    -    -    -    -\n')

  return status

In [None]:
results = main()

iris_data.csv
/content/drive/MyDrive/Colab Notebooks/data/iris_data.csv  - Bayes model performance =  0.988
/content/drive/MyDrive/Colab Notebooks/data/iris_data.csv  - SVM model performance   =  0.976
/content/drive/MyDrive/Colab Notebooks/data/iris_data.csv  - DTree model performance =  0.0
/content/drive/MyDrive/Colab Notebooks/data/iris_data.csv  - KNN model performance   =  0.0

-    -    -    -    Next File    -    -    -    -

sports_class.csv
/content/drive/MyDrive/Colab Notebooks/data/sports_class.csv  - Bayes model performance =  0.143
/content/drive/MyDrive/Colab Notebooks/data/sports_class.csv  - SVM model performance   =  0.571
/content/drive/MyDrive/Colab Notebooks/data/sports_class.csv  - DTree model performance =  0.0
/content/drive/MyDrive/Colab Notebooks/data/sports_class.csv  - KNN model performance   =  0.0

-    -    -    -    Next File    -    -    -    -

bc_data.csv
/content/drive/MyDrive/Colab Notebooks/data/bc_data.csv  - Bayes model performance =  0.645
/cont

In [None]:
def f_loadData(fn):
  #df = '/content/drive/MyDrive/Colab Notebooks/data/' + str(fn)
  dataframe = pd.read_csv(fn)
  return dataframe

In [None]:
for i in range (len(list_of_files)):
    filename = location + '/' + list_of_files[i]
    df = f_loadData(filename)