# Full Autonomous Pipeline (End-to-End GPU)

## Step 1: Upload Your Data

In [None]:
from google.colab import files
import cudf
import os

uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = cudf.read_csv(filename)
print(f"\n‚úÖ Loaded {len(df):,} rows")

## Step 2: Automated EDA

In [None]:
print("üîç Analyzing data...")
# Statistics
summary = df.describe().to_pandas()
print("\nüìä Numerical Summary:")
display(summary)

# Missing Values
missing = df.isnull().sum()
print(f"\n‚ö†Ô∏è Total Missing Values: {missing.sum()}")

## Step 3: Preparing & Training (XGBoost GPU)

In [None]:
import xgboost as xgb
from cuml.model_selection import train_test_split
from cuml.metrics import accuracy_score, r2_score

target = df.columns[-1]
X = cudf.get_dummies(df.drop(columns=[target]))
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

is_clf = y.nunique() < 10
print(f"ü§ñ Training {'Classification' if is_clf else 'Regression'} model on GPU...")

if is_clf:
    model = xgb.XGBClassifier(tree_method='gpu_hist')
else:
    model = xgb.XGBRegressor(tree_method='gpu_hist')

model.fit(X_train, y_train)
preds = model.predict(X_test)

score = accuracy_score(y_test, preds) if is_clf else r2_score(y_test, preds)
print(f"\nüèÜ Best Model Score: {score:.4f}")