In [1]:
 # Data Manipulation and Preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Models
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier

# Deep Learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense



In [2]:
# Load dataset
data = pd.read_csv('data.csv')

# Features (X) and target (y)
X = data.drop(columns=['Pump Data'])
y = data['Pump Data']

# Split data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize/scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
# Parameter grid for SVM
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['rbf']
}

# GridSearchCV for SVM
svm_grid = GridSearchCV(SVC(), svm_param_grid, cv=10, scoring='accuracy', verbose=1)
svm_grid.fit(X_train_scaled, y_train)

# Best parameters and score
print("Best SVM Parameters:", svm_grid.best_params_)
print("Best SVM Score (CV):", svm_grid.best_score_)


Fitting 10 folds for each of 16 candidates, totalling 160 fits
Best SVM Parameters: {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
Best SVM Score (CV): 0.9970833333333333
