In [2]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as pl

from sklearn.model_selection import cross_validate # for cross validation
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [3]:
# Leitura dos dados
df = pd.read_csv('trainning.data', delimiter=';')
data = np.array(df.values[: , 2:], dtype = float) 
(N, d) = data.shape
print(N, 'x', d)

120618 x 7


In [9]:
print(len(df.keys()))

9
Index(['date', 'time', 'activity', 'acceleration_x', 'acceleration_y',
       'acceleration_z', 'gyro_x', 'gyro_y', 'gyro_z'],
      dtype='object')


In [10]:
inputs = data[:, 1:] # Neste caso todas as linhas desde a segunda coluna até à última
output = data[:, 0]

In [39]:
inputs_train, inputs_test, output_train, output_test = train_test_split(inputs,output,test_size = 0.3,shuffle = True)

single_input_test = inputs_test[0]
single_input_test = single_input_test.reshape(1, -1)

In [12]:
SVM = SVC(C = 1.0, kernel = 'linear') # kernel 'rbf'
SVM.fit(inputs_train, output_train)

In [40]:
print('Accuracy:', SVM.score(inputs_test, output_test))

Accuracy: 0.8789863483115017


In [43]:
output_predicted = SVM.predict(single_input_test)
print(output_predicted)

[0.]


In [58]:
# Import necessary libraries
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Define the function
def train_and_optimize_model(model, param_grid, inputs, output, metrics=['accuracy'], test_size=0.3, cv=10):

    # Split the dataset
    inputs_train, inputs_test, output_train, output_test = train_test_split(
        inputs, output, test_size=test_size, shuffle=True
    )
    
    # Perform grid search for hyperparameter optimization
    grid_search = GridSearchCV(
        model, param_grid, cv=cv, scoring=metrics, refit=metrics[0], n_jobs=-1
    )
    grid_search.fit(inputs_train, output_train)
    
    # Best parameters and scores
    best_params = grid_search.best_params_
    
    # Evaluate on the test set
    best_model = grid_search.best_estimator_
    predictions = best_model.predict(inputs_test)
    
    # Classification report
    report = classification_report(output_test, predictions, output_dict=True)
    
    # Cross-validation scores
    cv_results = cross_validate(
        best_model, inputs, output, cv=cv, scoring=metrics, n_jobs=-1
    )
    cv_scores = {
        metric: {
            "mean": cv_results[f"test_{metric}"].mean(),
            "std": cv_results[f"test_{metric}"].std(),
        }
        for metric in metrics
    }
    
    return {
        "best_params": best_params,
        "cv_scores": cv_scores,
        "classification_report": report,
    }

# Create a toy dataset
inputs, output = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Define the SVM model and the hyperparameter grid
svm_model = SVC()
svm_param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}

# Call the function to train and optimize the SVM model
result_svm = train_and_optimize_model(svm_model, svm_param_grid, inputs, output, metrics=['accuracy', 'f1'])

# Print the results
print("SVM Results:")
print("Best Parameters:", result_svm['best_params'])
print("Cross-Validation Scores:")
print("Accuracy - Mean:", result_svm['cv_scores']['accuracy']['mean'], "STD:", result_svm['cv_scores']['accuracy']['std'])
print("F1 Score - Mean:", result_svm['cv_scores']['f1']['mean'], "STD:", result_svm['cv_scores']['f1']['std'])

SVM Results:
Best Parameters: {'C': 1, 'kernel': 'linear'}
Cross-Validation Scores:
Accuracy - Mean: 0.868 STD: 0.036551333764994136
F1 Score - Mean: 0.8666277019756865 STD: 0.03700959257706742
