# Imports


In [25]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import classification_report

## Read Data

In [26]:
cols = ['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'class']
df = pd.read_csv('magic04.data', names=cols)
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


### Convert Target variable 'class' to 0 & 1

In [27]:
df['class'] = (df['class'] == 'g').astype(int)

## Seperate features and target

In [28]:
X = df.drop('class', axis=1)
y = df['class']


### Because of imbalance in data, use oversampling to balance

In [29]:
oversampler = RandomOverSampler(random_state=42)
X_resampled, y_resampled = oversampler.fit_resample(X, y)

In [30]:
print(X_resampled.shape)
print(y_resampled.shape)

(24664, 10)
(24664,)


### Feature scaling

In [31]:
scaler = StandardScaler()

X_rescaled = scaler.fit_transform(X_resampled)

## Train-test Split

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X_rescaled, y_resampled, test_size=0.2, random_state=42)

In [33]:
svm_model = SVC()
svm_model = svm_model.fit(X_train, y_train)

In [34]:
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.80      0.84      2435
           1       0.82      0.91      0.86      2498

    accuracy                           0.86      4933
   macro avg       0.86      0.85      0.85      4933
weighted avg       0.86      0.86      0.85      4933



In [61]:
sample_data = {
    "gamma": np.array([ 7.51362e+01,  3.09205e+01,  3.16110e+00,  3.16800e-01,
        1.83200e-01, -5.52770e+00,  2.85525e+01,  2.18393e+01,
        4.64800e+00,  3.56462e+02]),
    "hedron": np.array([ 1.205135e+02,  7.690180e+01,  3.993900e+00,  9.440000e-02,
        6.830000e-02,  5.804300e+00, -9.352240e+01, -6.383890e+01,
        8.468740e+01,  4.083166e+02])
}

In [62]:
gamma_scaled = scaler.transform(sample_data["gamma"].reshape(1, -1))
hedron_scaled = scaler.transform(sample_data["hedron"].reshape(1, -1))

gamma_pred = svm_model.predict(gamma_scaled)
hedron_pred = svm_model.predict(hedron_scaled)

print("Gamma Prediction:", gamma_pred[0])
print("Hedron Prediction:", hedron_pred[0])

Gamma Prediction: 1
Hedron Prediction: 0




### Save model and scaler

In [63]:
import joblib

# Assuming your trained model is in 'svm_model' and scaler in 'scaler'
joblib.dump(svm_model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']