In [107]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_selector
from sklearn.model_selection import RandomizedSearchCV


In [108]:
from sklearn.linear_model import LinearRegression,LogisticRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.svm import SVR,SVC
from sklearn.naive_bayes import MultinomialNB, BernoulliNB , GaussianNB
from sklearn.neighbors import KNeighborsRegressor,KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [109]:
classification_models = {
                           "LogisticRegression" : {
                                                        'model' : LogisticRegression(),
                                                        'para' : {
                                                                      'penalty': ['l1','l2'],
                                                                      'C' : [0.001,0.1,1,5,10]
                                                        }
                           },
                            "RandomForestClassifier" : {
                                                        'model' : RandomForestClassifier(),
                                                        'para' : {
                                                                'n_estimators': [200,300],
                                                                'max_depth': [10, 20, None],
                                                                'min_samples_leaf': [1, 5, 10],
                                                                'max_features': ['sqrt', 'log2']
                                                                }
                             },
                             "KNN" : {
                                        'model' : KNeighborsClassifier(),
                                        'para' : {
                                                    'n_neighbors' : range(1,11)
                                                 }
                            },
                             "SVC" : {
                                        'model' : SVC(),
                                        'para' : {
                                                    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                                                    'C' : [0.001,0.1,1,5,10],
                                                    'gamma' : ['scale', 'auto']
                                                 }
                            },
                            "BernoulliNB" : {
                                        'model' : BernoulliNB(),
                                        'para' : {
                                                    'alpha': [0.01,0.1,10]
                                                 }
                            },
                            "GaussianNB" : {
                                        'model' : GaussianNB(),
                                        'para' : {
                                                    'var_smoothing' : [1e-8,1e-9,1e-10,1e-11]
                                                 }
                            },
}

In [110]:
regression_models = {
                           "LinearRegression" : {
                                                        'model' : LinearRegression(),
                                                        'para' : {
                                                                      'fit_intercept': [True,False]
                                                        }
                           },
                           "Lasso" : {
                                                        'model' : Lasso(),
                                                        'para' : {
                                                                  'alpha' : [0.001,0.01,0.1,1,10],
                                                                  'max_iter':  [200,500,1000]  
                                                        }
                           },
                           "Ridge" : {
                                                        'model' : Ridge(),
                                                        'para' : {
                                                                  'alpha' : [0.001,0.01,0.1,1,10],
                                                                  'max_iter':  [200,500,1000]  
                                                        }
                           },
                            "RandomForestRegressor" : {
                                                        'model' : RandomForestRegressor(),
                                                        'para' : {
                                                                'n_estimators': [200,300],
                                                                'max_depth': [10, 20, None],
                                                                'min_samples_leaf': [1, 5, 10],
                                                                'max_features': ['sqrt', 'log2']
                                                                }
                             },
                             "KNN" : {
                                        'model' : KNeighborsRegressor(),
                                        'para' : {
                                                    'n_neighbors' : range(1,11)
                                                 }
                            },
                             "SVR" : {
                                        'model' : SVR(),
                                        'para' : {
                                                    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                                                    'C' : [0.001,0.1,1,5,10],
                                                    'gamma' : ['scale', 'auto']
                                                 }
                            },
}

In [111]:
def clean(x,target):
    x = x.dropna(axis = 1,how = 'all')
    x = x.dropna(subset = [target])
    imputer = SimpleImputer(strategy='most_frequent')
    imputed_data = imputer.fit_transform(x)
    #print(imputed_data.dtype.names)
    return pd.DataFrame(imputed_data,columns= x.columns)

def feature_engineering(x):
    x = x.apply(pd.to_numeric, errors='ignore')
    cat_features = x.select_dtypes(include='object')
    num_features = x.select_dtypes(exclude='object')
    print(cat_features.columns)
    for col in cat_features.columns:
        if len(cat_features[col].unique()) > 5:
            x = x.drop(col,axis = 1)
            print(col)

    for col in num_features.columns:
        varience = np.var(num_features[col])
        print(f'{col} : {varience}')
        if varience > -0.5 and varience < .5:
            x = x.drop(col,axis = 1)
            print(col)

    return x

def outlier_detection(X,target):
    num = X.select_dtypes(include='number').columns
    for col in num:
        upper_limit = X[col].mean() + 3*X[col].std()
        lower_limit = X[col].mean() - 3*X[col].std()
        X = X[(X[col] > lower_limit) & (X[col] < lower_limit)]
    return X

def X_encoding(x):
    ohe = OneHotEncoder(sparse_output=False).set_output(transform='pandas')
    cat_features = x.select_dtypes(include='object')
    encoded = ohe.fit_transform(cat_features)
    result = x.drop(cat_features,axis = 1)
    result = pd.concat([result,encoded],axis=1)
    return result

def y_encoding(y):
    try:
        y = y.apply(pd.to_numeric)
        print('done',y.iloc[0].dtype)
        return y
    except:
        uni = y.iloc[0].unique()
        num = len(uni)
        for i in range(num):
            y.iloc[0] = y.iloc[0].apply(lambda x: i if x == uni[i] else x)
        return y

def iscat(x):
    return True if x.iloc[0].dtype == 'object'  else False

def data_scale(x):
    x = x.apply(pd.to_numeric, errors='ignore')
    numeric_col = x.select_dtypes(include='number')
    numeric_feature_name = numeric_col.columns
    scale = StandardScaler()
    x[numeric_feature_name] = scale.fit_transform(x[numeric_feature_name])
    return x

def dimention_reduction(x):
    pca = PCA(0.95).set_output(transform='pandas')
    x = pca.fit_transform(x)
    return x

In [112]:
from sklearn.preprocessing import FunctionTransformer

clean_transformer = FunctionTransformer(clean)
feature_engineering_transformer = FunctionTransformer(feature_engineering)
X_encoding_transformer = FunctionTransformer(X_encoding)
y_encoding_transformer = FunctionTransformer(y_encoding)
dimention_reduction_transformer = FunctionTransformer(dimention_reduction)


In [113]:
data = pd.read_csv("D:\\DataSet\\Titanic\\archive\\Titanic-Dataset.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [114]:
data = clean(data,'Survived')
data.isna().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64

In [115]:
data = outlier_detection(data,'Survived')

In [116]:
X_axis = data.drop(['Survived'],axis=1)
X_axis.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,B96 B98,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,B96 B98,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,B96 B98,S


In [117]:
X_axis = feature_engineering(X_axis)
X_axis.head()

Index(['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], dtype='object')
Name
Ticket
Cabin
PassengerId : 66156.66666666667
Pclass : 0.6982305912347064
Age : 174.03315198449138
SibSp : 1.2146782704208816
Parch : 0.6489990313409693
Fare : 2466.665311685043


  x = x.apply(pd.to_numeric, errors='ignore')


Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,3,male,22.0,1,0,7.25,S
1,2,1,female,38.0,1,0,71.2833,C
2,3,3,female,26.0,0,0,7.925,S
3,4,1,female,35.0,1,0,53.1,S
4,5,3,male,35.0,0,0,8.05,S


In [118]:
X_axis = X_encoding(X_axis)
X_axis

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,3,22.0,1,0,7.2500,0.0,1.0,0.0,0.0,1.0
1,2,1,38.0,1,0,71.2833,1.0,0.0,1.0,0.0,0.0
2,3,3,26.0,0,0,7.9250,1.0,0.0,0.0,0.0,1.0
3,4,1,35.0,1,0,53.1000,1.0,0.0,0.0,0.0,1.0
4,5,3,35.0,0,0,8.0500,0.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
886,887,2,27.0,0,0,13.0000,0.0,1.0,0.0,0.0,1.0
887,888,1,19.0,0,0,30.0000,1.0,0.0,0.0,0.0,1.0
888,889,3,24.0,1,2,23.4500,1.0,0.0,0.0,0.0,1.0
889,890,1,26.0,0,0,30.0000,0.0,1.0,1.0,0.0,0.0


In [119]:
X_axis = data_scale(X_axis)

  x = x.apply(pd.to_numeric, errors='ignore')


In [120]:
#X_axis = dimention_reduction(X_axis)

In [121]:
y_axis = data[['Survived']]

y_axis = y_encoding(y_axis)

done int64


In [122]:
X_axis.head()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,-1.730108,0.827377,-0.497793,0.432793,-0.473674,-0.502445,-0.737695,0.737695,-0.482043,-0.307562,0.615838
1,-1.72622,-1.566107,0.715048,0.432793,-0.473674,0.786845,1.355574,-1.355574,2.074505,-0.307562,-1.623803
2,-1.722332,0.827377,-0.194583,-0.474545,-0.473674,-0.488854,1.355574,-1.355574,-0.482043,-0.307562,0.615838
3,-1.718444,-1.566107,0.48764,0.432793,-0.473674,0.42073,1.355574,-1.355574,-0.482043,-0.307562,0.615838
4,-1.714556,0.827377,0.48764,-0.474545,-0.473674,-0.486337,-0.737695,0.737695,-0.482043,-0.307562,0.615838


In [123]:
#y_axis = pd.to_numeric(y_axis['Survived'])

In [124]:
for model_name,model in classification_models.items():
    grid = GridSearchCV(model['model'],model['para'],cv=3,n_jobs=-1)
    grid.fit(X_axis,y_axis.values.ravel())
    print(f'model : {model_name}')
    print(f'Best Score : {grid.best_score_}')
    print(f'Best parameter : {grid.best_params_}')

15 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(

model : LogisticRegression
Best Score : 0.7890011223344557
Best parameter : {'C': 0.1, 'penalty': 'l2'}
model : RandomForestClassifier
Best Score : 0.8294051627384961
Best parameter : {'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'n_estimators': 300}
model : KNN
Best Score : 0.8215488215488215
Best parameter : {'n_neighbors': 10}
model : SVC
Best Score : 0.8260381593714928
Best parameter : {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
model : BernoulliNB
Best Score : 0.7755331088664422
Best parameter : {'alpha': 10}
model : GaussianNB
Best Score : 0.7845117845117845
Best parameter : {'var_smoothing': 1e-08}


In [125]:
reg_data = pd.read_csv("C:\\Users\\Debjit\\Downloads\\House Price Prediction Dataset.csv")
reg_data = pd.DataFrame(reg_data)
reg_data.head()

Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location,Condition,Garage,Price
0,1,1360,5,4,3,1970,Downtown,Excellent,No,149919
1,2,4272,5,4,3,1958,Downtown,Excellent,No,424998
2,3,3592,2,2,3,1938,Downtown,Good,No,266746
3,4,966,4,2,2,1902,Suburban,Fair,Yes,244020
4,5,4926,1,4,2,1975,Downtown,Fair,Yes,636056


In [126]:

#reg_data = reg_data.dropna(axis=1, how='all')

In [127]:
reg_data = clean(reg_data,'Price')
reg_data.head()

Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location,Condition,Garage,Price
0,1,1360,5,4,3,1970,Downtown,Excellent,No,149919
1,2,4272,5,4,3,1958,Downtown,Excellent,No,424998
2,3,3592,2,2,3,1938,Downtown,Good,No,266746
3,4,966,4,2,2,1902,Suburban,Fair,Yes,244020
4,5,4926,1,4,2,1975,Downtown,Fair,Yes,636056


In [128]:
reg_data = outlier_detection(reg_data,'Price')

In [129]:
reg_X_axis = reg_data.drop(['Price'],axis = 1)
reg_y_axis = reg_data[['Price']]

In [130]:
reg_X_axis.head()

Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location,Condition,Garage
0,1,1360,5,4,3,1970,Downtown,Excellent,No
1,2,4272,5,4,3,1958,Downtown,Excellent,No
2,3,3592,2,2,3,1938,Downtown,Good,No
3,4,966,4,2,2,1902,Suburban,Fair,Yes
4,5,4926,1,4,2,1975,Downtown,Fair,Yes


In [131]:
reg_y_axis.head()

Unnamed: 0,Price
0,149919
1,424998
2,266746
3,244020
4,636056


In [132]:
reg_X_axis = feature_engineering(reg_X_axis)
reg_X_axis.head()

Index(['Location', 'Condition', 'Garage'], dtype='object')
Id : 333333.25
Area : 1676566.52760975
Bedrooms : 2.02848775
Bathrooms : 1.2292437499999997
Floors : 0.65445775
YearBuilt : 1290.082084


  x = x.apply(pd.to_numeric, errors='ignore')


Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location,Condition,Garage
0,1,1360,5,4,3,1970,Downtown,Excellent,No
1,2,4272,5,4,3,1958,Downtown,Excellent,No
2,3,3592,2,2,3,1938,Downtown,Good,No
3,4,966,4,2,2,1902,Suburban,Fair,Yes
4,5,4926,1,4,2,1975,Downtown,Fair,Yes


In [133]:
reg_X_axis = X_encoding(reg_X_axis)
reg_X_axis.head()

Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location_Downtown,Location_Rural,Location_Suburban,Location_Urban,Condition_Excellent,Condition_Fair,Condition_Good,Condition_Poor,Garage_No,Garage_Yes
0,1,1360,5,4,3,1970,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,2,4272,5,4,3,1958,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,3,3592,2,2,3,1938,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,4,966,4,2,2,1902,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,5,4926,1,4,2,1975,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [134]:
reg_X_axis = data_scale(reg_X_axis)
reg_X_axis.head()

  x = x.apply(pd.to_numeric, errors='ignore')


Unnamed: 0,Id,Area,Bedrooms,Bathrooms,Floors,YearBuilt,Location_Downtown,Location_Rural,Location_Suburban,Location_Urban,Condition_Excellent,Condition_Fair,Condition_Good,Condition_Poor,Garage_No,Garage_Yes
0,-1.731185,-1.101471,1.401791,1.305568,1.244151,0.238155,1.607554,-0.557329,-0.564262,-0.565802,1.707013,-0.593519,-0.547307,-0.582739,0.962695,-0.962695
1,-1.729453,1.147485,1.401791,1.305568,1.244151,-0.095942,1.607554,-0.557329,-0.564262,-0.565802,1.707013,-0.593519,-0.547307,-0.582739,0.962695,-0.962695
2,-1.727721,0.622317,-0.704581,-0.498326,1.244151,-0.65277,1.607554,-0.557329,-0.564262,-0.565802,-0.585819,-0.593519,1.827127,-0.582739,0.962695,-0.962695
3,-1.725989,-1.405759,0.699667,-0.498326,0.008035,-1.655061,-0.622063,-0.557329,1.772226,-0.565802,-0.585819,1.684865,-0.547307,-0.582739,-1.03875,1.03875
4,-1.724257,1.652574,-1.406705,1.305568,0.008035,0.377363,1.607554,-0.557329,-0.564262,-0.565802,-0.585819,1.684865,-0.547307,-0.582739,-1.03875,1.03875


In [135]:
reg_y_axis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Price   2000 non-null   object
dtypes: object(1)
memory usage: 15.8+ KB


In [136]:
reg_y_axis = y_encoding(reg_y_axis)
reg_y_axis.head()

done int64


Unnamed: 0,Price
0,149919
1,424998
2,266746
3,244020
4,636056


In [137]:
for model_name,model in regression_models.items():
    grid = GridSearchCV(model['model'],model['para'],cv=3,n_jobs=-1,scoring='r2')
    grid.fit(reg_X_axis,reg_y_axis.values.ravel())
    print(f'model : {model_name}')
    print(f'Best Score : {grid.best_score_}')
    print(f'Best parameter : {grid.best_params_}')

model : LinearRegression
Best Score : -0.010802349299289657
Best parameter : {'fit_intercept': True}
model : Lasso
Best Score : -0.010778121047996506
Best parameter : {'alpha': 10, 'max_iter': 200}
model : Ridge
Best Score : -0.010614123212955665
Best parameter : {'alpha': 10, 'max_iter': 200}
model : RandomForestRegressor
Best Score : -0.012509117474923151
Best parameter : {'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 10, 'n_estimators': 300}
model : KNN
Best Score : -0.0811656238530342
Best parameter : {'n_neighbors': 10}
model : SVR
Best Score : -0.001889253766966988
Best parameter : {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}


In [147]:
churn = pd.read_csv("C:\\Users\\Debjit\\Downloads\\telecom_churn.csv")
churn.head()

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.7,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.7,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.0,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.0,2,299.4,71,57.0,3.1,6.6
4,0,75,0,0,0.0,3,166.7,113,41.0,7.42,10.1


In [139]:
churn = clean(churn,'Churn')
churn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3333 entries, 0 to 3332
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Churn            3333 non-null   float64
 1   AccountWeeks     3333 non-null   float64
 2   ContractRenewal  3333 non-null   float64
 3   DataPlan         3333 non-null   float64
 4   DataUsage        3333 non-null   float64
 5   CustServCalls    3333 non-null   float64
 6   DayMins          3333 non-null   float64
 7   DayCalls         3333 non-null   float64
 8   MonthlyCharge    3333 non-null   float64
 9   OverageFee       3333 non-null   float64
 10  RoamMins         3333 non-null   float64
dtypes: float64(11)
memory usage: 286.6 KB


In [140]:
#churn = outlier_detection(churn,'Churn')
#churn.info()

In [141]:
churn_X = churn.drop(['Churn'],axis=1)
churn_y = churn[['Churn']]

In [142]:
churn_y.head()

Unnamed: 0,Churn
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [143]:
churn_X = feature_engineering(churn_X)

Index([], dtype='object')
AccountWeeks : 1585.3243329733511
ContractRenewal : 0.08751820276537103
ContractRenewal
DataPlan : 0.20010479895874375
DataPlan
DataUsage : 1.619197982930606
CustServCalls : 1.7299974821964648
DayMins : 2965.8063885668494
DayCalls : 402.6472983932057
MonthlyCharge : 269.73356472820996
OverageFee : 6.427905736208184
RoamMins : 7.792029519783663


  x = x.apply(pd.to_numeric, errors='ignore')


In [144]:
churn_X = data_scale(churn_X)

  x = x.apply(pd.to_numeric, errors='ignore')


In [145]:
if iscat(churn_y):
    churn_y = y_encoding(churn_y)

In [146]:
for model_name,model in classification_models.items():
    grid = GridSearchCV(model['model'],model['para'],cv=3,n_jobs=-1)
    grid.fit(churn_X,churn_y.values.ravel())
    print(f'model : {model_name}')
    print(f'Best Score : {grid.best_score_}')
    print(f'Best parameter : {grid.best_params_}')

15 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\Debjit\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(

model : LogisticRegression
Best Score : 0.8589858985898591
Best parameter : {'C': 1, 'penalty': 'l2'}
model : RandomForestClassifier
Best Score : 0.9240924092409241
Best parameter : {'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'n_estimators': 200}
model : KNN
Best Score : 0.8964896489648965
Best parameter : {'n_neighbors': 7}
model : SVC
Best Score : 0.9165916591659166
Best parameter : {'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}
model : BernoulliNB
Best Score : 0.8550855085508551
Best parameter : {'alpha': 0.01}
model : GaussianNB
Best Score : 0.8682868286828683
Best parameter : {'var_smoothing': 1e-08}
