In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

In [5]:

# Step 1: Load the dataset (make sure the CSV is in your working directory)
churn_data = pd.read_csv('Telco-Customer-Churn.csv')

# Step 2: Drop unnecessary column (customerID is not useful for prediction)
churn_data = churn_data.drop('customerID', axis=1)

# Step 3: Convert TotalCharges to numeric (it's stored as string; some values are blank)
churn_data['TotalCharges'] = pd.to_numeric(churn_data['TotalCharges'], errors='coerce')
# Fill NaN in TotalCharges (only 11 rows) with median
churn_data['TotalCharges'].fillna(churn_data['TotalCharges'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  churn_data['TotalCharges'].fillna(churn_data['TotalCharges'].median(), inplace=True)


In [6]:
# Step 4: Encode categorical variables
label_encoders = {}
for column in churn_data.columns:
    if churn_data[column].dtype == 'object':
        le = LabelEncoder()
        churn_data[column] = le.fit_transform(churn_data[column])
        label_encoders[column] = le

# Now all columns are numeric
X = churn_data.iloc[:, :-1]  # All features except last column
y = churn_data.iloc[:, -1]   # Last column is 'Churn' (0 = No, 1 = Yes)

# Step 5: Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=123
)

# Step 6: Train XGBoost classifier
xg_cl = xgb.XGBClassifier(
    objective='binary:logistic',
    n_estimators=100,  # Increased for better performance
    seed=123,
    use_label_encoder=False,  # Suppress warning
    eval_metric='logloss'
)

xg_cl.fit(X_train, y_train)

# Step 7: Predict and evaluate
preds = xg_cl.predict(X_test)
accuracy = float(np.sum(preds == y_test)) / y_test.shape[0]
print("Accuracy: %f" % (accuracy))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.784244


In [None]:
# Import necessary modules
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Instantiate the DecisionTreeClassifier with max_depth=4
dt_clf_4 = DecisionTreeClassifier(max_depth=4, random_state=123)

# Fit the classifier to the training data
dt_clf_4.fit(X_train, y_train)

# Predict labels for the test set
y_pred_4 = dt_clf_4.predict(X_test)

# Compute accuracy
accuracy = float(np.sum(y_pred_4 == y_test)) / y_test.shape[0]
print(f"Accuracy: {accuracy:.4f}")



In [None]:
# Import necessary libraries
import xgboost as xgb
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Create the DMatrix
churn_dmatrix = xgb.DMatrix(data=X, label=y)

# Create the parameter dictionary
params = {"objective": "binary:logistic", "max_depth": 3}

# Perform 3-fold cross-validation
cv_results = xgb.cv(dtrain=churn_dmatrix, params=params, nfold=3, num_boost_round=5, 
                    metrics="error", as_pandas=True, seed=123)

# Print cv_results
print(cv_results)

# Print the accuracy
print(f"Accuracy: {((1 - cv_results['test-error-mean']).iloc[-1]):.4f}")

In [None]:
# Import necessary libraries
import xgboost as xgb
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Create the DMatrix
churn_dmatrix = xgb.DMatrix(data=X, label=y)

# Create the parameter dictionary
params = {"objective": "binary:logistic", "max_depth": 3}

# Perform 3-fold cross-validation with AUC metric
cv_results = xgb.cv(dtrain=churn_dmatrix, params=params, nfold=3, num_boost_round=5, 
                    metrics="auc", as_pandas=True, seed=123)

# Print cv_results
print(cv_results)

# Print the AUC
print(f"AUC: {(cv_results['test-auc-mean']).iloc[-1]:.4f}")




In [None]:
# Import necessary libraries
import xgboost as xgb
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Generate synthetic regression dataset
X, y = make_regression(n_samples=1000, n_features=4, noise=0.1, random_state=123)

# Split the data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Create DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define parameters
params = {"objective": "reg:squarederror", "max_depth": 3, "eta": 0.1}

# Train the model
xgb_model = xgb.train(params, dtrain, num_boost_round=10)

# Predict on test set
y_pred = xgb_model.predict(dtest)

# Compute RMSE and MAE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")