## Step 1: Upload Your Data
Run this cell to upload your CSV file.

In [None]:
from google.colab import files
import os

uploaded = files.upload()
filename = list(uploaded.keys())[0]
print(f"\n‚úÖ Uploaded: {filename}")

## Step 2: Initialize GPU Dataframe
We use **cuDF** to load data directly into GPU memory.

In [None]:
import cudf
import pandas as pd

try:
    df = cudf.read_csv(filename)
    print(f"‚úÖ Loaded {len(df):,} rows into GPU memory")
    print(df.head())
except Exception as e:
    print(f"‚ùå Error loading with cuDF: {e}")
    print("Falling back to pandas...")
    df_pd = pd.read_csv(filename)
    df = cudf.from_pandas(df_pd)
    print("‚úÖ Converted from pandas to cuDF")

## Step 3: Run EDA Agent
Simulating the EDA Agent logic for cloud execution.

In [None]:
import time

def run_eda_agent(df):
    start_time = time.time()
    results = {}
    
    # Basic Info
    results['shape'] = df.shape
    results['missing'] = df.isnull().sum().to_pandas().to_dict()
    
    # Statistics (GPU-Accelerated)
    numeric_cols = df.select_dtypes(include=['number']).columns
    results['stats'] = df[numeric_cols].describe().to_pandas().to_dict()
    
    # Outliers (IQR Method on GPU)
    outliers = {}
    for col in numeric_cols:
        q1 = df[col].quantile(0.25)
        q3 = df[col].quantile(0.75)
        iqr = q3 - q1
        lower = q1 - 1.5 * iqr
        upper = q3 + 1.5 * iqr
        count = ((df[col] < lower) | (df[col] > upper)).sum()
        if count > 0:
            outliers[col] = int(count)
    results['outliers'] = outliers
    
    duration = time.time() - start_time
    return results, duration

results, duration = run_eda_agent(df)
print(f"‚úÖ EDA Completed in {duration:.2f}s")

## Step 4: Display Results

In [None]:
import plotly.express as px
import pandas as pd

print("üìä Dataset Overview:")
print(f"- Rows: {results['shape'][0]:,}")
print(f"- Columns: {results['shape'][1]}")

print("\n‚ö†Ô∏è Missing Values:")
missing_df = pd.Series(results['missing']).reset_index()
missing_df.columns = ['Column', 'Counts']
print(missing_df[missing_df['Counts'] > 0])

print("\nüö® Outliers Detected:")
for col, count in results['outliers'].items():
    print(f"- {col}: {count} outliers")