## **GridSearchCV**

In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split, GridSearchCV


In [7]:
# Load dataset
dataset1 = pd.read_csv('TitanicDataset.csv')

# Data preprocessing
dataset1.drop(columns=['Cabin', 'Embarked', 'Ticket', 'Fare', 'SibSp', 'Name', 'Parch'], inplace=True, axis=1)
dataset1.Age = dataset1.Age.fillna(dataset1.Age.mean())
dataset1.Sex = dataset1.Sex.replace('male', 1)
dataset1.Sex = dataset1.Sex.replace('female', 0)
dataset1.Survived = dataset1.Survived.astype(int)
dataset1.Age = dataset1.Age.astype(int)

In [8]:
dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   PassengerId  891 non-null    int64
 1   Survived     891 non-null    int64
 2   Pclass       891 non-null    int64
 3   Sex          891 non-null    int64
 4   Age          891 non-null    int64
dtypes: int64(5)
memory usage: 34.9 KB


In [9]:
# Separate features and target
X = dataset1.drop(columns=['Survived'])
y = dataset1['Survived']

# Split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the perceptron model
perceptron = Perceptron()



In [10]:
# Define the grid of hyperparameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [30, 40, 50],
    'eta0': [0.1, 0.01, 0.001],
    'tol': [1e-3, 1e-4, 1e-5],
}

# Define accuracy as the scoring metric
scorer = make_scorer(accuracy_score)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=perceptron, param_grid=param_grid, scoring=scorer, cv=5)
grid_result = grid_search.fit(x_train, y_train)

# Get the best parameters and best score
best_params = grid_result.best_params_
best_score = grid_result.best_score_

print("Best Parameters:", best_params)
print("Best Accuracy:", best_score)

# Get the best model
best_model = grid_result.best_estimator_

# Evaluate the best model on the test set
y_pred = best_model.predict(x_test)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", test_accuracy)

Best Parameters: {'alpha': 0.0001, 'eta0': 0.1, 'max_iter': 30, 'tol': 0.001}
Best Accuracy: 0.47359999999999997
Test Accuracy: 0.585820895522388


**IRIS Dataset**

In [25]:
Dataset1 = pd.read_csv("Iris.csv")

In [26]:
Dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [27]:
Dataset1.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [28]:
Dataset1.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [29]:
Dataset1.head(10)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
6,7,4.6,3.4,1.4,0.3,Iris-setosa
7,8,5.0,3.4,1.5,0.2,Iris-setosa
8,9,4.4,2.9,1.4,0.2,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa


In [30]:
Dataset1.Species = Dataset1.Species.replace('Iris-setosa', 0)

In [31]:
Dataset1.Species = Dataset1.Species.replace('Iris-versicolor', 1)
Dataset1.Species = Dataset1.Species.replace('Iris-virginica', 2)


In [32]:
Dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    int64  
dtypes: float64(4), int64(2)
memory usage: 7.2 KB


In [33]:
Dataset1=Dataset1.drop(columns=['Id'])

In [34]:
Target1 = Dataset1.Species

In [35]:
Target1.info()

<class 'pandas.core.series.Series'>
RangeIndex: 150 entries, 0 to 149
Series name: Species
Non-Null Count  Dtype
--------------  -----
150 non-null    int64
dtypes: int64(1)
memory usage: 1.3 KB


In [36]:
import pandas as pd
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split, GridSearchCV

In [37]:
X_train,X_test,Y_train,Y_test = train_test_split(Dataset1,Target1,test_size=0.3,random_state=15)

In [38]:
model = Perceptron()

In [39]:
import numpy as np
class Perceptron1:
  def __init__(self, learning_rate, epochs):
    self.weights = None
    self.bias = None
    self.learning_rate = learning_rate
    self.epochs = epochs

  def activation(self,z):
    return np.heaviside(z,0)

  def fit(self, X ,y):  #xtrain ytrain
    n_samples, n_features = X.shape
    self.weights = np.zeros(n_features)
    self.bias = 0

    for epoch in range(self.epochs):
      for _ in range(n_samples):
        z = np.dot(X.iloc[_],self.weights)+self.bias
        y_pred = self.activation(z)

        #Update weight and bias
        self.weights += self.learning_rate*(y.iloc[_]-y_pred)*X.iloc[_]
        self.bias += self.learning_rate*(y.iloc[_]-y_pred)

  def predict(self,X):
    z = np.dot(X,self.weights) + self.bias
    return self.activation(z)

In [41]:
perceptron1 = Perceptron1(0.01,10)
perceptron1.fit(X_train,Y_train)
pred = perceptron1.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy_score(Y_test,pred)

0.35555555555555557

In [42]:
from sklearn.metrics import classification_report
print(classification_report(Y_test,pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.36      1.00      0.52        16
           2       0.00      0.00      0.00        14

    accuracy                           0.36        45
   macro avg       0.12      0.33      0.17        45
weighted avg       0.13      0.36      0.19        45



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#USING GridSearchCV

In [43]:
# Define the grid of hyperparameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [30, 40, 50],
    'eta0': [0.1, 0.01, 0.001],
    'tol': [1e-3, 1e-4, 1e-5],
}

# Define accuracy as the scoring metric
scorer = make_scorer(accuracy_score)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=perceptron, param_grid=param_grid, scoring=scorer, cv=5)
grid_result = grid_search.fit(X_train, Y_train)

# Get the best parameters and best score
best_params = grid_result.best_params_
best_score = grid_result.best_score_

print("Best Parameters:", best_params)
print("Best Accuracy:", best_score)

# Get the best model
best_model = grid_result.best_estimator_

# Evaluate the best model on the test set
Y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(Y_test, Y_pred)
print("Test Accuracy:", test_accuracy)

Best Parameters: {'alpha': 0.0001, 'eta0': 0.01, 'max_iter': 30, 'tol': 0.001}
Best Accuracy: 0.9142857142857143
Test Accuracy: 0.9111111111111111


## **GridSeachCV**

*** class sklearn.model_selection.GridSearchCV(estimator, param_grid, *, scoring=None, n_jobs=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score=nan, return_train_score=False)
Exhaustive search over specified parameter values for an estimator.

Important members are fit, predict.

GridSearchCV implements a “fit” and a “score” method. It also implements “score_samples”, “predict”, “predict_proba”, “decision_function”, “transform” and “inverse_transform” if they are implemented in the estimator used.

The parameters of the estimator used to apply these methods are optimized by cross-validated grid-search over a parameter grid.

The parameters selected are those that maximize the score of the left out data, unless an explicit score is passed in which case it is used instead.
Hyper-parameters are parameters that are not directly learnt within estimators. In scikit-learn they are passed as arguments to the constructor of the estimator classes. Typical examples include C, kernel and gamma for Support Vector Classifier, alpha for Lasso, etc.

It is possible and recommended to search the hyper-parameter space for the best cross validation score. ***