In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

In [21]:
# Generate sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

In [23]:
print(X)

[[ 1.57587581  0.89186302 -0.72677414 ...  0.99455824 -0.29969606
  -0.76176037]
 [ 0.53545875  1.26666125 -1.77987511 ... -0.55527309  0.74131982
  -0.98752296]
 [-0.68627914  0.15712792  0.14583647 ...  0.19791667 -0.40436199
  -0.60747229]
 ...
 [ 0.42796263 -0.83359828  1.63925652 ...  0.19188955 -0.5552861
  -0.16055961]
 [-0.27372925 -1.38059871  0.14959796 ... -0.06963345 -0.68363428
   0.68916446]
 [ 0.5376299   0.37915312 -0.2799933  ... -2.39030437  0.91347373
  -0.41222075]]


In [25]:
print(y)

[-6.29556571e+01  1.08721847e+02 -1.15541354e+02  3.64131648e+01
  1.30839469e+02 -2.32704094e+01 -7.83456593e+01  2.10862963e+02
  3.27271995e+01 -7.27438293e+01  6.24736179e+01  1.79523181e+01
 -5.44888341e+00  9.63867421e+01  1.92925713e+01 -2.40039526e+02
  1.52130675e+02  1.58105957e+02 -1.69008596e+01  6.31341639e+01
  2.04941141e+01 -1.18586946e+02 -3.56174833e+01 -4.17994447e+01
  1.61756769e+02  1.54949653e+02 -1.48762751e+02 -8.15427407e+01
 -7.14296188e+01 -2.34454272e+01 -9.87111979e+01 -1.60329917e+02
  1.00161943e+02 -1.21219603e+02  8.32990308e+00  1.06355461e+02
 -1.58098861e+02 -5.02392356e+01  1.24894705e+02 -1.14965187e+01
  4.21526401e+01  1.10041943e+02 -2.13806945e+02  1.69435317e+02
  1.95328435e+02 -2.51304669e+02  1.66546496e+02  8.73710227e+01
  7.63459987e+01  1.90879193e+02  3.14961317e+02 -2.62593616e+02
  1.31988301e+02 -2.76115351e+01  1.16430812e+01 -7.55574003e+01
  2.12818265e+02  1.91690144e+02  7.03259880e+01 -1.01398682e+02
 -6.94385430e+01  6.41303

In [27]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
X_train

array([[-1.02123282, -0.55519953,  0.24380071, ..., -2.19880596,
        -0.50205422,  0.44001445],
       [ 1.00730436, -1.75550559, -0.86442671, ..., -0.62483462,
         0.73215123, -1.10128649],
       [ 0.19091652, -0.47579544,  0.1469353 , ..., -1.35055596,
        -1.68466758, -0.07587145],
       ...,
       [ 2.57199532, -1.24224653,  0.60873559, ..., -0.06787138,
         0.73967547,  0.43163163],
       [ 0.03408347, -0.37912774,  0.23378591, ..., -1.01475673,
        -1.22394027, -0.64927755],
       [-0.32307574,  1.42395385,  0.97520054, ...,  0.13931963,
        -1.50080202,  1.11091114]])

In [31]:
y_train

array([-2.34454272e+01,  7.16747503e+00, -8.64913817e+01, -5.99956768e+00,
       -1.93333233e+02, -1.30192029e+02, -3.73631135e+01, -1.39561642e+02,
       -5.93866885e+00,  1.24337600e+02, -1.48613070e+02,  8.88611074e+01,
       -1.27933067e+01,  1.59573135e+02, -1.31279025e+02, -4.55911807e+00,
        2.76173481e+02,  1.74094884e+02,  3.15745561e+02,  9.67834284e+00,
        1.21592007e+02,  1.76903044e+02,  4.34485501e+01,  1.90879193e+02,
        1.95144397e+02, -1.15541354e+02,  5.71892839e+00,  2.59207361e+01,
        2.11593555e+02, -1.45188546e+02,  2.62356651e+02,  2.91143405e+02,
        9.03307346e+01,  9.66165699e+01, -1.21219603e+02, -1.60329917e+02,
       -4.77937388e+01, -1.50432508e+02, -2.51226019e+02,  1.10461771e+02,
        3.64131648e+01,  2.41811081e+02,  1.57206007e+02, -3.78387694e+01,
        1.38181951e+02,  7.14951932e+01, -2.14689504e+01,  1.59674235e+02,
       -1.22560537e+02,  1.05896037e+02,  1.02494212e+02,  1.84474735e+02,
       -5.24437874e+00, -

In [33]:
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

In [35]:
print("""
Explanation
Data Generation: We generate a synthetic dataset with a linear trend and some noise.
Data Splitting: The dataset is split into training and testing sets.
Model Training: We create and train a GradientBoostingRegressor model with 100 estimators, a learning rate of 0.1, and a maximum depth of 3.
Prediction: The model makes predictions on the test data.
Evaluation: The model's performance is evaluated using Mean Squared Error (MSE) and R-squared value.
Visualization: The actual vs. predicted values are plotted to visualize the model's performance.
Parameters of GradientBoostingRegressor
n_estimators: The number of boosting stages to be run (i.e., the number of trees).
learning_rate: Determines the contribution of each tree. Smaller values lead to more robust models.
max_depth: The maximum depth of the individual regression estimators.
random_state: Controls the random number generator for reproducibility.
Summary
Gradient Boosting Regression: An ensemble method that builds sequential decision trees to minimize prediction errors.
Sequential Training: Each new tree corrects the residuals of the previous trees.
Evaluation Metrics: Use MSE and R-squared to evaluate model performance.
Parameters: Adjust n_estimators, learning_rate, and max_depth to optimize the model.
Gradient Boosting Regression is a powerful technique for predictive modeling, especially when dealing with complex and non-linear relationships.
""")


Explanation
Data Generation: We generate a synthetic dataset with a linear trend and some noise.
Data Splitting: The dataset is split into training and testing sets.
Model Training: We create and train a GradientBoostingRegressor model with 100 estimators, a learning rate of 0.1, and a maximum depth of 3.
Prediction: The model makes predictions on the test data.
Evaluation: The model's performance is evaluated using Mean Squared Error (MSE) and R-squared value.
Visualization: The actual vs. predicted values are plotted to visualize the model's performance.
Parameters of GradientBoostingRegressor
n_estimators: The number of boosting stages to be run (i.e., the number of trees).
learning_rate: Determines the contribution of each tree. Smaller values lead to more robust models.
max_depth: The maximum depth of the individual regression estimators.
random_state: Controls the random number generator for reproducibility.
Summary
Gradient Boosting Regression: An ensemble method that builds se

In [37]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 1239.3668657180506


In [43]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [51]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_clusters_per_class=1, random_state=42)

print(X)
print(y)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy Score: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)

[[ 3.72811627  1.6982774   0.79283748 ... -0.34931051  0.73747667
  -4.88798756]
 [ 2.9825839  -0.77780799 -1.74268767 ... -3.1687581   1.69523164
  -0.94897593]
 [ 2.71200952  1.43911868  6.94368474 ... -4.18283324  0.13584056
   1.21526962]
 ...
 [ 1.04595906  0.68346134  1.46819254 ...  2.70438267  0.56890408
  -4.11073977]
 [ 0.7284482   2.29070411  0.41736198 ...  2.27570155  3.88918335
  -0.71470104]
 [ 0.67789235  6.08064068 -2.9015902  ... -0.47047576 -1.48819308
  -5.77484811]]
[0 0 0 1 1 1 1 0 1 1 1 1 0 1 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0
 1 1 0 1 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0
 1 1 0 0 1 0 1 1 1 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 0 1 0 1 1 0 1 0 1 1 1 1 1
 0 0 0 1 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 1 0 0
 0 1 0 0 1 1 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 1 1 0 1
 1 0 1 0 0 0 1 1 0 1 0 1 1 0 1 1 0 1 0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0
 1 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1

In [53]:
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 100, criterion="entropy")  
classifier.fit(X_train, y_train)  

In [57]:
y_pred1= classifier.predict(X_test)  

In [65]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score
cm= confusion_matrix(y_test, y_pred1)
accuracy = accuracy_score(y_test, y_pred1)
print(f'Accuracy Score: {accuracy}')

Accuracy Score: 0.98


In [61]:
print(cm)

[[ 92   1]
 [  3 104]]


In [69]:
import pandas as pd
data_set= pd.read_csv(r"C:\Users\keert\machine learning\User_Data (1).csv")  

In [71]:
data_set

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [98]:
x=data_set[['Age','EstimatedSalary']]
y=data_set[['Purchased']]
# print(x)
# print(y)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# print(X_train)
# print(y_train)
model = GradientBoostingClassifier(n_estimators=10, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy Score: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)


Accuracy Score: 0.9125
Confusion Matrix:
[[47  5]
 [ 2 26]]


  y = column_or_1d(y, warn=True)
