In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
# Step 1: Install CatBoost if not already installed
#!pip install catboost # Install the catboost package
# Step 2: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from catboost import CatBoostRegressor
# Step 3: Load a house prices dataset
# For this example, we'll use a simplified dataset
data_url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'
df = pd.read_csv(data_url)
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())
# Step 4: Preprocess the data
X = df.drop(['medv'], axis=1)  # 'medv' is the median value of owner-occupied homes (target)
y = df['medv']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 5: Initialize the CatBoostRegressor
catboost_model = CatBoostRegressor(iterations=1000, learning_rate=0.1, depth=6, verbose=100)
# Step 6: Train the model
catboost_model.fit(X_train, y_train)
# Step 7: Make predictions
y_pred = catboost_model.predict(X_test)
# Step 8: Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f"\nMean Absolute Error of the CatBoost model: {mae:.2f}")
# Optional: Feature Importance Plot
feature_importances = catboost_model.get_feature_importance(prettified=True)
print("Name : Ayan Mastim, UIN : 211P031")
print("\nFeature Importance:")
print(feature_importances)

First few rows of the dataset:
      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  medv  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  
0:	learn: 8.7281973	total: 4.14ms	remaining: 4.14s
100:	learn: 1.8117247	total: 769ms	remaining: 6.85s
200:	learn: 1.0137452	total: 1.55s	remaining: 6.16s
300:	learn: 0.6594916	total: 2.5s	remaining: 5.8s
400:	learn: 0.4489127	total: 3.74s	remaining: 5.58s
500:	learn: 0.3171942	total: 4.14s	remaining: 4.12s
600:	learn: 0.2275363	total: 

In [None]:
!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from pytorch_tabnet.tab_model import TabNetClassifier
# Load the Adult Income dataset
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
  "age", "workclass", "fnlwgt", "education", "education-num",
  "marital-status", "occupation", "relationship", "race", "sex",
  "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"
]
df = pd.read_csv(data_url, header=None, names=column_names, na_values=' ?', skipinitialspace=True)
print("\nFirst few rows of the dataset (by Ayan Mastim 211P031):")
print(df.head())
# Preprocess the data
df.dropna(inplace=True)
# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
  le = LabelEncoder()
  df[column] = le.fit_transform(df[column])
  label_encoders[column] = le
# Define features and target variable
X = df.drop('income', axis=1).values
y = df['income'].values
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 5: Train the TabNet model
tabnet_model = TabNetClassifier()
tabnet_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], max_epochs=100, patience=10)
# Step 6: Make predictions
y_pred = tabnet_model.predict(X_test)
# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of TabNet model: {accuracy * 100:.2f}%")
print("Classification Report By Ayan Mastim 211P031:")
print(classification_report(y_test, y_pred))


First few rows of the dataset (by Ayan Mastim 211P031):
   age         workclass  fnlwgt  education  education-num  \
0   39         State-gov   77516  Bachelors             13   
1   50  Self-emp-not-inc   83311  Bachelors             13   
2   38           Private  215646    HS-grad              9   
3   53           Private  234721       11th              7   
4   28           Private  338409  Bachelors             13   

       marital-status         occupation   relationship   race     sex  \
0       Never-married       Adm-clerical  Not-in-family  White    Male   
1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   

   capital-gain  capital-loss  hours-per-week native-country income  
0          2174             0              40  United-S



epoch 0  | loss: 0.50326 | val_0_auc: 0.52672 |  0:00:01s
epoch 1  | loss: 0.39072 | val_0_auc: 0.58154 |  0:00:02s
epoch 2  | loss: 0.3729  | val_0_auc: 0.72405 |  0:00:04s
epoch 3  | loss: 0.35846 | val_0_auc: 0.75822 |  0:00:05s
epoch 4  | loss: 0.34875 | val_0_auc: 0.86099 |  0:00:07s
epoch 5  | loss: 0.34248 | val_0_auc: 0.87831 |  0:00:09s
epoch 6  | loss: 0.3396  | val_0_auc: 0.88809 |  0:00:10s
epoch 7  | loss: 0.33441 | val_0_auc: 0.89585 |  0:00:12s
epoch 8  | loss: 0.33174 | val_0_auc: 0.89978 |  0:00:13s
epoch 9  | loss: 0.33593 | val_0_auc: 0.89903 |  0:00:15s
epoch 10 | loss: 0.33311 | val_0_auc: 0.90043 |  0:00:16s
epoch 11 | loss: 0.33266 | val_0_auc: 0.89865 |  0:00:17s
epoch 12 | loss: 0.33368 | val_0_auc: 0.90167 |  0:00:19s
epoch 13 | loss: 0.32843 | val_0_auc: 0.90354 |  0:00:21s
epoch 14 | loss: 0.32792 | val_0_auc: 0.90086 |  0:00:23s
epoch 15 | loss: 0.32791 | val_0_auc: 0.90445 |  0:00:24s
epoch 16 | loss: 0.32833 | val_0_auc: 0.90535 |  0:00:25s
epoch 17 | los




Accuracy of TabNet model: 85.55%
Classification Report By Ayan Mastim 211P031:
              precision    recall  f1-score   support

           0       0.88      0.94      0.91      4942
           1       0.75      0.60      0.67      1571

    accuracy                           0.86      6513
   macro avg       0.82      0.77      0.79      6513
weighted avg       0.85      0.86      0.85      6513

