# Name: Kongkiat Patwary
# ID: 211-35-720
# Lab Report

In [2]:
# Step 1: Install required packages
!pip install xgboost scikit-learn pandas



In [3]:
# Step 2: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor

In [4]:
# Step 3: Load the Iris dataset (Classification)
iris_df = pd.read_csv('/content/Iris.csv')

In [5]:
print("Iris Columns:", iris_df.columns)

Iris Columns: Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')


In [6]:
if 'Id' in iris_df.columns:
    iris_df = iris_df.drop('Id', axis=1)

In [7]:
# Define features and target
X_cls = iris_df.drop('Species', axis=1)
y_cls = LabelEncoder().fit_transform(iris_df['Species'])

In [8]:
# Train-test split
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X_cls, y_cls, test_size=0.2, random_state=42)

In [9]:
# Define hyperparameter grid
param_grid_cls = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'subsample': [0.7, 1.0],
    'colsample_bytree': [0.7, 1.0],
    'learning_rate': [0.1]
}

In [10]:
# Train and tune classification model
xgb_cls = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
grid_cls = GridSearchCV(xgb_cls, param_grid_cls, cv=3, scoring='accuracy', n_jobs=-1)
grid_cls.fit(X_train_cls, y_train_cls)

Parameters: { "use_label_encoder" } are not used.



In [11]:
# Predict and evaluate
best_cls = grid_cls.best_estimator_
y_pred_cls = best_cls.predict(X_test_cls)

print("\n🌸 Iris Classification Results:")
print("Best Parameters:", grid_cls.best_params_)
print("Accuracy:", accuracy_score(y_test_cls, y_pred_cls))


🌸 Iris Classification Results:
Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.7}
Accuracy: 1.0


In [12]:
# Step 4: Load the Student Performance dataset (Regression)
student_df = pd.read_csv('/content/Student_Performance.csv')

# Preview columns
print("Student Columns:", student_df.columns)

Student Columns: Index(['Hours Studied', 'Previous Scores', 'Extracurricular Activities',
       'Sleep Hours', 'Sample Question Papers Practiced', 'Performance Index'],
      dtype='object')


In [15]:
target_col = None
for col in student_df.columns:
    if 'g3' in col.lower() or 'grade' in col.lower() or 'score' in col.lower() or 'target' in col.lower():
        target_col = col
        break

if not target_col:
    raise ValueError("❌ Target column like 'G3', 'Grade', or 'Score' not found. Please check the file.")

print(f"🎯 Target column detected: {target_col}")


🎯 Target column detected: Previous Scores


In [17]:
# Step 6: Prepare features and labels
X_reg = student_df.drop(target_col, axis=1)
y_reg = student_df[target_col]

# Encode categorical variables
X_reg = pd.get_dummies(X_reg, drop_first=True)

# Train-test split
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)



In [18]:
# Step 7: Define hyperparameter grid
param_grid_reg = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'subsample': [0.7, 1.0],
    'colsample_bytree': [0.7, 1.0],
    'learning_rate': [0.1]
}


In [19]:
# Step 8: Train using GridSearchCV
xgb_reg = XGBRegressor(objective='reg:squarederror', random_state=42)
grid_reg = GridSearchCV(xgb_reg, param_grid_reg, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_reg.fit(X_train_reg, y_train_reg)

In [20]:
# Step 9: Evaluate
best_reg = grid_reg.best_estimator_
y_pred_reg = best_reg.predict(X_test_reg)
rmse = np.sqrt(mean_squared_error(y_test_reg, y_pred_reg))

print("\n✅ Regression Results:")
print("Best Parameters:", grid_reg.best_params_)
print("Test RMSE:", rmse)


✅ Regression Results:
Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.7}
Test RMSE: 1.9869876393095456
