In [49]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [50]:
basket = ['apple', 'orange', 'grape', 'strawberry']

In [51]:
encoder = LabelEncoder()
labels = encoder.fit_transform(basket)
print(labels)

[0 2 1 3]


In [52]:
encoder=LabelEncoder()
encoder.fit(basket)
labels=encoder.fit_transform(basket)
original_labels = encoder.inverse_transform(labels)
original_labels

array(['apple', 'orange', 'grape', 'strawberry'], dtype='<U10')

In [53]:
encoder = LabelEncoder()
labels = encoder.fit_transform(basket).reshape(-1, 1)
onehot_encoder = OneHotEncoder()
onehot_labels = onehot_encoder.fit_transform(labels)
onehot_labels.toarray()

array([[1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.]])

In [54]:
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
target = iris.target

In [55]:
imputer = SimpleImputer(strategy='mean')
data_imputed = imputer.fit_transform(iris_df)


In [56]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_imputed)
data_scaled = pd.DataFrame(data_scaled, columns=iris.feature_names)


In [57]:
X_train, X_test, y_train, y_test = train_test_split(data_scaled, target, test_size=0.2, random_state=42)


In [58]:
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)
print(f'Logistic Regression Accuracy: {accuracy_log_reg:.2f}')


Logistic Regression Accuracy: 1.00


In [59]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Accuracy: {accuracy_rf:.2f}')



Random Forest Accuracy: 1.00


In [60]:
param_grid = {'C': [0.1, 1,10]}
grid_search = GridSearchCV(LogisticRegression(random_state=42), param_grid,cv=5)
grid_search.fit(X_train, y_train)
best_log_reg = grid_search.best_estimator_
y_pred_best_log_reg = best_log_reg.predict(X_test)
accuracy_best_log_reg = accuracy_score(y_test, y_pred_best_log_reg)
print(f'best Logistic Regression Accuracy (Grid Search): {accuracy_best_log_reg:.2f}')


best Logistic Regression Accuracy (Grid Search): 1.00


In [62]:
param_dist = {'n_estimators': [10, 50,100], 'max_depth': [None, 10, 20, 30]}
random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_distributions=param_dist, n_iter=10,random_state=42)
random_search.fit(X_train, y_train)
best_rf = random_search.best_estimator_
y_pred_best_rf = best_rf.predict(X_test)
accuracy_best_rf = accuracy_score(y_test, y_pred_best_rf)
print(f'best random Forest Accuracy (Random Search): {accuracy_best_rf:.2f}')


best random Forest Accuracy (Random Search): 1.00
