# Understanding 

In [86]:
import numpy as np
import pandas as pd

# Define the matrix
matrs = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8]
])

# Define column names
column_names = ['C', 'S', 'Q', 'N']

# Create an empty DataFrame
X = pd.DataFrame()

# Assign transposed columns to the DataFrame
for i in range(len(matrs.T)):  # Iterate over transposed columns
    X[column_names[i]] = matrs.T[i]
    print(X[column_names[i]])
    print('-------')
    print(matrs.T[i])
    print('-----------------------')
    

# Display the resulting DataFrame
print(X)


0    1
1    5
Name: C, dtype: int64
-------
[1 5]
-----------------------
0    2
1    6
Name: S, dtype: int64
-------
[2 6]
-----------------------
0    3
1    7
Name: Q, dtype: int64
-------
[3 7]
-----------------------
0    4
1    8
Name: N, dtype: int64
-------
[4 8]
-----------------------
   C  S  Q  N
0  1  2  3  4
1  5  6  7  8


# Adaboost Classifier

In [131]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

In [89]:
X, y= load_breast_cancer(return_X_y=True)


In [90]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [124]:
base_estimator = DecisionTreeClassifier(random_state=42)
adaboost = AdaBoostClassifier(estimator=base_estimator, algorithm='SAMME', random_state=42)

In [126]:
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'learning_rate': [0.01, 0.1, 1, 10],
    'estimator__max_depth': [1, 2, 3, None]
}

In [128]:
grid_search = GridSearchCV(estimator=adaboost, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=1, verbose=1)

In [94]:
grid_search.fit(X_train,y_train)

Fitting 5 folds for each of 64 candidates, totalling 320 fits


In [102]:
print('Best parameters:', grid_search.best_params_)
print('Best Cross-Validation Accuracy:', grid_search.best_score_)

Best parameters: {'estimator__max_depth': 1, 'learning_rate': 1, 'n_estimators': 50}
Best Cross-Validation Accuracy: 0.9802197802197803


In [104]:
best_model = grid_search.best_estimator_

In [112]:
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

In [114]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Test Accuracy: 0.9649122807017544


# Adaboost Regressor

In [134]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error

In [150]:
data = fetch_california_housing(as_frame=True)
print(data.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in block group
    - AveRooms      average number of rooms per household
    - AveBedrms     average number of bedrooms per household
    - Population    block group population
    - AveOccup      average number of household members
    - Latitude      block group latitude
    - Longitude     block group longitude

:Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived from the 1990 U.S. census, using one row per ce

In [138]:
df=data.frame

In [140]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [None]:
X = df.drop('')