# Beginner-Level Regression Model

Using the Indian Trader Loss Dataset

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score


In [None]:

df = pd.read_csv('/kaggle/input/indian-trader-loss-dataset/Indian_Trader_Loss_Dataset.csv')
df = df.drop("Trader_ID", axis=1)


In [None]:

X = df.drop("Loss_Percentage", axis=1)
y = df["Loss_Percentage"]


In [None]:

ordinal_features = {
    "Age_Group": ["<20", "20-30", "31-40", "41-50", "51+"],
    "Holding_Period": ["<1 Month", "1-3 Months", "3-6 Months", "6-12 Months", ">1 Year"]
}

ordinal_cols = list(ordinal_features.keys())
ordinal_encoder = OrdinalEncoder(categories=[ordinal_features[col] for col in ordinal_cols])
X[ordinal_cols] = ordinal_encoder.fit_transform(X[ordinal_cols])


In [None]:

categorical_cols = [col for col in X.select_dtypes(include="object").columns if col not in ordinal_cols]

preprocessor = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)],
    remainder="passthrough"
)

pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", GradientBoostingRegressor(random_state=42))
])


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))
