In [2]:
import pandas as pd

dataframe = pd.read_csv('water_potability.csv')

In [3]:
y = dataframe.iloc[:, -1]
X = dataframe.iloc[:,:-1]

In [4]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

X_imputed_df = pd.DataFrame(X_imputed, columns=X.columns)

In [5]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_imputed_df)

X_pca_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])

In [5]:
from sklearn.svm import NuSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


X_train, X_test, y_train, y_test = train_test_split(X_pca_df, y, test_size=0.2, random_state=42)

nu_svm_classifier = NuSVC(nu=0.1, kernel='linear')

nu_svm_classifier.fit(X_train, y_train)

y_pred = nu_svm_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of the NuSVC classifier:", accuracy)


In [None]:
from sklearn.svm import NuSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_pca_df_sample, _, y_sample, _ = train_test_split(X_pca_df, y, test_size=0.9, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X_pca_df_sample, y_sample, test_size=0.2, random_state=42)

penalty_values = [0.01, 0.1, 1, 10, 100, 1000, 10000]

accuracy_results = {}

for nu in penalty_values:
    nu_svm_classifier = NuSVC(nu=nu, kernel='linear')
    
    nu_svm_classifier.fit(X_train, y_train)
    
    y_pred = nu_svm_classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    accuracy_results[nu] = accuracy

for nu, accuracy in accuracy_results.items():
    print(f"Accuracy for nu={nu}: {accuracy}")


In [9]:
from sklearn.svm import NuSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_pca_df, y, test_size=0.2, random_state=42)

best_penalty = 1.0 

poly_degrees = [2, 3, 4]

accuracy_results = {}

for degree in poly_degrees:
    nu_svm_classifier = NuSVC(nu=0.1, kernel='poly', degree=degree, coef0=1.0, gamma='scale')
    
    nu_svm_classifier.fit(X_train, y_train)
    
    y_pred = nu_svm_classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    accuracy_results[degree] = accuracy

for degree, accuracy in accuracy_results.items():
    print(f"Accuracy for polynomial degree {degree}: {accuracy}")


Accuracy for polynomial degree 2: 0.5030487804878049
Accuracy for polynomial degree 3: 0.5457317073170732
Accuracy for polynomial degree 4: 0.510670731707317
