1

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
np.random.seed(42)
num_samples = 100

In [5]:
data = pd.DataFrame({
    'Feature_1': np.random.randn(num_samples) * 10,
    'Feature_2': np.random.rand(num_samples) * 100,
    'Feature_3': np.random.randint(1, 50, num_samples),
    'Target': np.random.choice([0, 1], num_samples)
})

In [6]:
print("\ndata")
print(data.head())


data
   Feature_1  Feature_2  Feature_3  Target
0   4.967142  41.741100         34       1
1  -1.382643  22.210781         21       0
2   6.476885  11.986537         30       1
3  15.230299  33.761517         33       1
4  -2.341534  94.290970         28       1


In [7]:
X = data.drop(columns=['Target'])
y = data['Target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
print("\nsize:", X_train.shape)
print("test size:", X_test.shape)


size: (80, 3)
test size: (20, 3)


In [10]:
print("\nbase statistic:")
print(data.describe())


base statistic:
        Feature_1   Feature_2   Feature_3  Target
count  100.000000  100.000000  100.000000  100.00
mean    -1.038465   48.620619   27.550000    0.55
std      9.081684   28.814392   13.740286    0.50
min    -26.197451    0.506158    1.000000    0.00
25%     -6.009057   24.127969   18.750000    0.00
50%     -1.269563   50.738604   29.000000    1.00
75%      4.059521   69.467614   39.000000    1.00
max     18.522782   98.565045   49.000000    1.00


2

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [12]:
np.random.seed(42)
num_samples = 100

In [14]:
data = pd.DataFrame({
    'Feature_1': np.random.randn(num_samples) * 10,
    'Feature_2': np.random.rand(num_samples) * 100,
    'Feature_3': np.random.randint(1, 50, size=num_samples),
    'Target': np.random.choice([0, 1], size=num_samples)
})

In [15]:
print("\nPreview of the dataset:")
print(data.head())


Preview of the dataset:
   Feature_1  Feature_2  Feature_3  Target
0   4.967142  41.741100         34       1
1  -1.382643  22.210781         21       0
2   6.476885  11.986537         30       1
3  15.230299  33.761517         33       1
4  -2.341534  94.290970         28       1


In [17]:
X = data.drop(columns=['Target'])
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [22]:
y_pred = model.predict(X_test)

In [23]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 0.40

Classification Report:
              precision    recall  f1-score   support

           0       0.25      0.25      0.25         8
           1       0.50      0.50      0.50        12

    accuracy                           0.40        20
   macro avg       0.38      0.38      0.38        20
weighted avg       0.40      0.40      0.40        20



3

In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [25]:
np.random.seed(42)
num_samples = 100

In [26]:
data = pd.DataFrame({
    'Feature_1': np.random.randn(num_samples) * 10,
    'Feature_2': np.random.rand(num_samples) * 100,
    'Feature_3': np.random.randint(1, 50, size=num_samples),
    'Target': np.random.choice([0, 1], size=num_samples)
})


In [27]:
print("\nPreview of the dataset:")
print(data.head())


Preview of the dataset:
   Feature_1  Feature_2  Feature_3  Target
0   4.967142  41.741100         34       1
1  -1.382643  22.210781         21       0
2   6.476885  11.986537         30       1
3  15.230299  33.761517         33       1
4  -2.341534  94.290970         28       1


In [29]:
X = data.drop(columns=['Target'])
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()


In [30]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

In [31]:
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [32]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nBest Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Best Model Accuracy: 0.35

Classification Report:
              precision    recall  f1-score   support

           0       0.22      0.25      0.24         8
           1       0.45      0.42      0.43        12

    accuracy                           0.35        20
   macro avg       0.34      0.33      0.34        20
weighted avg       0.36      0.35      0.35        20



In [33]:
print("\n✅ Best Hyperparameters:")
print(grid_search.best_params_)



✅ Best Hyperparameters:
{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}


4

In [34]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report


In [36]:
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [38]:
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_clf.fit(X_train, y_train)


In [39]:
y_pred = svm_clf.predict(X_test)

In [40]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


In [41]:
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', report)


Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



5

In [7]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [8]:
data = datasets.load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
svm = SVC()
param_grid = {
    'C': [0.1, 1, 10, 100], 
    'kernel': ['linear', 'rbf', 'poly'], 
    'gamma': ['scale', 'auto']
}


In [11]:
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}


In [6]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 1.0


6

In [14]:
!pip install xgboost


Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.1/124.9 MB 648.1 kB/s eta 0:03:13
   ---------------------------------------- 0.1/124.9 MB 819.2 kB/s eta 0:02:33
   ---------------------------------------- 0.2/124.9 MB 1.4 MB/s eta 0:01:32
   ---------------------------------------- 0.5/124.9 MB 2.1 MB/s eta 0:01:00
   ---------------------------------------- 0.6/124.9 MB 2.3 MB/s eta 0:00:55
   ---------------------------------------- 0.8/124.9 MB 2.9 MB/s eta 0:00:43
   ---------------------------------------- 0.9/124.9 MB 2.6 MB/s eta 0:00:49
   ---------------------------------------- 0.9/124.9 MB 2.6 MB/s eta 0:00:49
    --------------------------------------- 1.9/124.9 MB 4.6 MB/s eta 0:00:27
    


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\ronyi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [15]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
model = XGBClassifier(
    n_estimators=100,  # Number of boosting rounds
    learning_rate=0.1,  # Step size shrinkage
    max_depth=7,  # Depth of trees
    subsample=0.8,  # Fraction of training samples
    colsample_bytree=0.8,  # Fraction of features used per tree
    gamma=0,  # Minimum loss reduction to make a split
    objective="binary:logistic",  # For binary classification
    eval_metric="logloss",  # Evaluation metric
    use_label_encoder=False
)
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [26]:
y_pred = model.predict(X_test)

In [25]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 1.0000


7

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'lerning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 5, 10],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2],
}

In [31]:
xgb_model = XGBClassifier(objective="binary:logistic", eval_metric="logloss", use_label_encoder=False)
grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    verbose=1,
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 2187 candidates, totalling 10935 fits


Parameters: { "lerning_rate", "use_label_encoder" } are not used.



In [32]:
print(f"Best Parameters: {grid_search.best_params_}")
best_model = grid_search.best_estimator_


Best Parameters: {'colsample_bytree': 0.6, 'gamma': 0, 'lerning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 300, 'subsample': 0.6}


In [33]:
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Best Model: {accuracy:.4f}")


Accuracy of Best Model: 1.0000
