# **Classification Problem**

In [36]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import numpy as np    
import pandas as pd

In [37]:
X,y=make_classification(n_samples=1000,n_features=20,random_state=42)

In [38]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [39]:
from xgboost import XGBClassifier

In [40]:
clf=XGBClassifier()

In [41]:
clf.fit(X_train,y_train)

In [42]:
y_pred=clf.predict(X_test)

In [43]:
from sklearn.metrics import classification_report,accuracy_score

In [44]:
accuracy_score(y_test,y_pred)

0.895

In [45]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.85      0.95      0.89        93
           1       0.95      0.85      0.90       107

    accuracy                           0.90       200
   macro avg       0.90      0.90      0.89       200
weighted avg       0.90      0.90      0.90       200



## Hyperparameter Tuning

In [46]:
from sklearn.model_selection import GridSearchCV

In [47]:
param_grid={ "learning_rate": [0.01, 0.1, 0.2], "max_depth": [3,4,5], "n_estimators": [50,100,200] }  
            
grid_clf = GridSearchCV(clf, param_grid, cv= 3, scoring = "accuracy") 
grid_clf.fit(X_train, y_train)

In [48]:
grid_clf.best_params_

{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200}

In [52]:
best_clf=grid_clf.best_estimator_

In [55]:
best_clf.fit(X_train,y_train)

In [56]:
y_pred=best_clf.predict(X_test)

In [57]:
accuracy_score(y_pred,y_test)

0.905

# **Regression Problem**

In [19]:
from sklearn.datasets import make_regression

X,y=make_regression(n_samples=1000,n_features=20,random_state=42)

In [20]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [21]:
from xgboost import XGBRegressor

In [22]:
reg=XGBRegressor()

In [24]:
reg.fit(X_train,y_train)

In [25]:
y_pred=reg.predict(X_test)

In [28]:
from sklearn.metrics import mean_squared_error,r2_score

In [27]:
mean_squared_error(y_test,y_pred)

np.float64(6528.914936401449)

In [30]:
rmse= np.sqrt(mean_squared_error(y_test, y_pred)) 
print(f"RMSE: {rmse}")

RMSE: 80.80170132120641


In [29]:
r2_score(y_test,y_pred)

0.8291767895101247