In [None]:
import math
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
import numpy as numpy


In [None]:
Data1 = pd.read_csv('/content/wine_data.csv')
cols = ["fixed_acidity" , "volatile_acidity" ,"residual_sugar","chlorides","free_sulfur_dioxide","total_sulfur_dioxide","density","pH","sulphates" , "alcohol" , "quality"]


In [None]:
data_train,data_test = train_test_split(Data1,test_size = 0.5,random_state = 4)
x_train = data_train.drop('quality',axis = 1)
y_train = data_train['quality']
x_test = data_test.drop('quality',axis = 1)
y_test = data_test['quality']


In [None]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
k_cent = 1100
km = KMeans(n_clusters = k_cent,max_iter = 100)
km.fit(x_train)
cent = km.cluster_centers_

In [None]:
max = 0
for i in range(k_cent):
  for j in range(k_cent):
    d = numpy.linalg.norm(cent[i]-cent[j])
    if d > max:
      max = d
d = max
sigma = d / math.sqrt(2*k_cent)
print(sigma)

0.2048056979172359


In [None]:
shape = x_train.shape
row = shape[0]
column = k_cent
G = numpy.empty((row,column) , dtype = float)
for i in range(row):
  for j in range(column):
    dist = numpy.linalg.norm(x_train[i] - cent[j])
    G[i][j] = math.exp(-math.pow(dist , 2 )/ math.pow(2 * sigma , 2))
print(G)

[[2.16761412e-078 9.00130833e-041 7.16817164e-026 ... 4.86056565e-044
  2.03878002e-077 1.02605220e-064]
 [8.83373990e-101 4.33079777e-022 5.80923350e-014 ... 4.57438262e-026
  2.22826477e-101 1.25130639e-058]
 [6.88409140e-108 5.85899306e-029 8.74122856e-030 ... 9.10554355e-030
  1.79445915e-109 2.04815088e-052]
 ...
 [1.71839693e-076 3.84400231e-019 3.43950076e-015 ... 3.00015693e-029
  1.46094766e-074 1.92981263e-037]
 [1.01320423e-082 7.09278562e-022 3.18330307e-021 ... 1.75008197e-046
  2.31795632e-078 5.05718628e-051]
 [2.20201373e-099 1.63187045e-017 1.41977211e-017 ... 1.08016982e-027
  9.35977503e-095 4.54505294e-055]]


In [None]:
GTG = numpy.dot(G.T , G)
GTG_inv = numpy.linalg.inv(GTG)
fac = numpy.dot(GTG_inv , G.T)
w = numpy.dot(fac , y_train)
print(w)

[20.88025541 51.86499858 25.27672682 ... 33.94516339 28.41489028
  8.        ]


In [None]:

row = x_test.shape[0]
column = k_cent
G_test = numpy.empty((row,column) , dtype = float)
for i in range(row):
  for j in range(column):
    dist = numpy.linalg.norm(x_test[i] - cent[j])
    G_test[i][j] = math.exp(-math.pow(dist , 2 )/ math.pow(2 * sigma , 2))
print(G_test[0])

[4.71397080e-138 6.62067063e-020 2.80746015e-038 ... 9.82187439e-039
 1.61341983e-134 1.82976706e-054]


In [None]:
prediction = numpy.dot(G_test , w)
prediction = 0.5 * (numpy.sign(prediction - 0.5) + 1)
score = accuracy_score(y_test,prediction)
print(score)

0.0


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Load the dataset
data = pd.read_csv('/content/wine_data.csv')

# Extract the features and target variable
# X = data[['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol']]
# y = data['quality']

X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Feature scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create an SVM classifier with an RBF kernel
svm = SVC(kernel='rbf', gamma=0.7, C=1.0)

# Train the SVM on all features
svm.fit(X_train, y_train)

# Make predictions
y_pred = svm.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}")

# # Plot the decision boundary (using only the first two features for visualization)
# def plot_decision_boundary(X, y, model):
#     # We will plot only the first two features for visualization purposes
#     X = X[:, :2]  # Select the first two features

#     # Train a new SVM model using only the first two features
#     svm_2d = SVC(kernel='rbf', gamma=0.7, C=1.0)
#     svm_2d.fit(X, y)

#     h = .02  # Step size in the mesh
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
#                          np.arange(y_min, y_max, h))

#     # Create a meshgrid with two features
#     mesh_data = np.c_[xx.ravel(), yy.ravel()]

#     # Make predictions for each point in the mesh
#     Z = svm_2d.predict(mesh_data)
#     Z = Z.reshape(xx.shape)

#     # Plot decision boundary and training points
#     plt.contourf(xx, yy, Z, alpha=0.8)
#     plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o')
#     plt.xlabel('Feature 1 (Scaled)')
#     plt.ylabel('Feature 2 (Scaled)')
#     plt.title('SVM Decision Boundary with RBF Kernel (First Two Features)')
#     plt.show()

# # Call the function to plot the decision boundary
# plot_decision_boundary(X_train, y_train, svm)

Accuracy: 31.14


In [None]:
#CORRECT RBF CODE
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import accuracy_score

data = pd.read_csv('wine_data.csv')

print(data.head())
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


def rbf_kernel(x1, x2, gamma):
    diff = x1 - x2
    return np.exp(-gamma * np.dot(diff, diff.T))

def rbf_classifier(X_train, y_train, X_test, gamma):
    predictions = []

    for test_sample in X_test:
        distances = []

        for train_sample in X_train:
            distance = rbf_kernel(test_sample, train_sample, gamma)
            distances.append(distance)

        closest_index = np.argmax(distances)
        predictions.append(y_train[closest_index])

    return np.array(predictions)

predictions = rbf_classifier(X_train, y_train, X_test, gamma=0.5)
print(predictions)
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')


   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \
0           11.6             0.580         0.66            2.20      0.074   
1           10.4             0.610         0.49            2.10      0.200   
2            7.4             1.185         0.00            4.25      0.097   
3           10.4             0.440         0.42            1.50      0.145   
4            8.3             1.020         0.02            3.40      0.084   

   free_sulfur_dioxide  total_sulfur_dioxide  density    pH  sulphates  \
0                 10.0                  47.0  1.00080  3.25       0.57   
1                  5.0                  16.0  0.99940  3.16       0.63   
2                  5.0                  14.0  0.99660  3.63       0.54   
3                 34.0                  48.0  0.99832  3.38       0.86   
4                  6.0                  11.0  0.99892  3.48       0.49   

   alcohol  quality  
0      9.0        3  
1      8.4        3  
2     10.7        3 