In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Classification Tree and fit it to the training data
clf = DecisionTreeClassifier(criterion='gini', max_depth=None, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 1.00


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the Pima Indians Diabetes dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=column_names)

# Split the dataset into features (X) and target (y)
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Classification Tree and fit it to the training data
clf = DecisionTreeClassifier(criterion='gini', max_depth=None, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the model's accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.70

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.71      0.76       151
           1       0.56      0.69      0.61        80

    accuracy                           0.70       231
   macro avg       0.68      0.70      0.69       231
weighted avg       0.72      0.70      0.71       231



In [5]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the California Housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Regression Tree and fit it to the training data
regressor = DecisionTreeRegressor(criterion='squared_error', max_depth=None, random_state=42)
regressor.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'Root Mean Squared Error: {rmse:.2f}')
print(f'R^2 Score: {r2:.2f}')


Mean Squared Error: 0.53
Root Mean Squared Error: 0.73
R^2 Score: 0.60


In [12]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the California Housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Bagging ensemble with DecisionTreeRegressor as the base estimator
bagging = BaggingRegressor(DecisionTreeRegressor(random_state=42), n_estimators=100, random_state=42)
bagging.fit(X_train, y_train)
y_pred_bagging = bagging.predict(X_test)

# Train a Random Forest model
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)

# Evaluate the models' performance
mse_bagging = mean_squared_error(y_test, y_pred_bagging)
rmse_bagging = np.sqrt(mse_bagging)
r2_bagging = r2_score(y_test, y_pred_bagging)

mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Bagging: Mean Squared Error: {mse_bagging:.2f}, Root Mean Squared Error: {rmse_bagging:.2f}, R^2 Score: {r2_bagging:.2f}')
print(f'Random Forest: Mean Squared Error: {mse_rf:.2f}, Root Mean Squared Error: {rmse_rf:.2f}, R^2 Score: {r2_rf:.2f}')


Bagging: Mean Squared Error: 0.26, Root Mean Squared Error: 0.51, R^2 Score: 0.80
Random Forest: Mean Squared Error: 0.26, Root Mean Squared Error: 0.51, R^2 Score: 0.80


In [13]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the California Housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Gradient Boosting model
gbm = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbm.fit(X_train, y_train)

# Make predictions on the testing set
y_pred_gbm = gbm.predict(X_test)

# Evaluate the model's performance
mse_gbm = mean_squared_error(y_test, y_pred_gbm)
rmse_gbm = np.sqrt(mse_gbm)
r2_gbm = r2_score(y_test, y_pred_gbm)

print(f'Gradient Boosting: Mean Squared Error: {mse_gbm:.2f}, Root Mean Squared Error: {rmse_gbm:.2f}, R^2 Score: {r2_gbm:.2f}')


Gradient Boosting: Mean Squared Error: 0.29, Root Mean Squared Error: 0.54, R^2 Score: 0.78


In [14]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the California Housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Multi-Layer Perceptron
mlp = MLPRegressor(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', random_state=42)
mlp.fit(X_train_scaled, y_train)

# Make predictions on the testing set
y_pred_mlp = mlp.predict(X_test_scaled)

# Evaluate the model's performance
mse_mlp = mean_squared_error(y_test, y_pred_mlp)
rmse_mlp = np.sqrt(mse_mlp)
r2_mlp = r2_score(y_test, y_pred_mlp)

print(f'Multi-Layer Perceptron: Mean Squared Error: {mse_mlp:.2f}, Root Mean Squared Error: {rmse_mlp:.2f}, R^2 Score: {r2_mlp:.2f}')


Multi-Layer Perceptron: Mean Squared Error: 0.26, Root Mean Squared Error: 0.51, R^2 Score: 0.80


In [15]:
# Predict the first five samples in the test set
X_test_sample = X_test[:5]
X_test_scaled_sample = X_test_scaled[:5]

# Bagging
y_pred_bagging_sample = bagging.predict(X_test_sample)
print("Bagging predictions:", y_pred_bagging_sample)

# Random Forest
y_pred_rf_sample = random_forest.predict(X_test_sample)
print("Random Forest predictions:", y_pred_rf_sample)

# Gradient Boosting
y_pred_gbm_sample = gbm.predict(X_test_sample)
print("Gradient Boosting predictions:", y_pred_gbm_sample)

# Multi-Layer Perceptron
y_pred_mlp_sample = mlp.predict(X_test_scaled_sample)
print("Multi-Layer Perceptron predictions:", y_pred_mlp_sample)

# Actual values
print("Actual values:", y_test[:5])


Bagging predictions: [0.47841   0.73213   4.8208461 2.61201   2.30565  ]
Random Forest predictions: [0.47809   0.74566   4.8298161 2.61482   2.3314   ]
Gradient Boosting predictions: [0.549768   0.97830362 4.0455572  2.61149449 2.26133023]
Multi-Layer Perceptron predictions: [0.46894938 1.05460817 4.80313622 2.61902577 2.30838916]
Actual values: [0.477   0.458   5.00001 2.186   2.78   ]
