# Model Exploration

In [7]:
import pandas as pd

# Load the dataset
df = pd.read_csv('model_data.csv')

### Support Vector Machine (SVM)
- Advantages: Effective in high-dimensional spaces, and robust to overfitting in high-dimensional data.
- Considerations: Requires careful tuning of hyperparameters like C (regularization) and gamma (kernel coefficient).

In [8]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer

# Example pipeline
svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('clf', SVC(kernel='linear', random_state=42))
])


### Gradient Boosting Machines (GBMs):
- Advantages: Typically performs well on a variety of problems and can handle different types of data.
- Considerations: Models like GradientBoostingClassifier or HistGradientBoostingClassifier can be effective.

In [12]:
from sklearn.ensemble import GradientBoostingClassifier

gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)


### Neural Networks:

- Advantages: Can capture complex patterns and interactions in data.
- Considerations: Requires careful tuning of network architecture and hyperparameters. Libraries like TensorFlow or PyTorch can be used.

In [10]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=200, random_state=42)
