In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report



data = {
    'Math': [85, 70, 90, 60, 75, 88, 45, 95],
    'Science': [88, 65, 95, 55, 70, 85, 50, 92],
    'English': [78, 80, 88, 58, 76, 90, 52, 85],
    'Pass': [1, 0, 1, 0, 1, 1, 0, 1]  
}

df = pd.DataFrame(data)
print(df.head())



   Math  Science  English  Pass
0    85       88       78     1
1    70       65       80     0
2    90       95       88     1
3    60       55       58     0
4    75       70       76     1


 2. Feature Engineering

In [2]:

# Create new features
df['Total_Score'] = df['Math'] + df['Science'] + df['English']
df['Average_Score'] = df['Total_Score'] / 3


#  Prepare X and y


# Features and target
X = df[['Math', 'Science', 'English', 'Total_Score', 'Average_Score']]
y = df['Pass']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)



3. Model & GridSearchCV

In [None]:

# Define model
model = DecisionTreeClassifier(random_state=42)

# Define hyperparameter grid (without None)
param_grid = {
    'max_depth': [2, 3, 4, 5],
    'min_samples_split': [2, 3, 5],
    'criterion': ['gini', 'entropy']
}

# Grid Search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid,cv=2, scoring='f1', n_jobs=-1)

grid_search.fit(X_train, y_train)

# Get best model
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'criterion': 'gini', 'max_depth': 2, 'min_samples_split': 2}



4. Evaluate Model

In [None]:

# Predict
y_pred = best_model.predict(X_test)

# Evaluation Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, labels=[0, 1], zero_division=0))



Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



Task 2.

In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# 2. Load dataset
df = pd.read_csv("fraud_detection.csv")

# 3. Preview the data
print("Initial rows:\n", df.head())



Initial rows:
    Transaction ID  Amount    Type  Is Fraud
0            1001    1500  credit         0
1            1002     250   debit         0
2            1003    7600  credit         1
3            1004     120   debit         0
4            1005    9500  credit         1


1. Load & Preprocess the Dataset


In [2]:
#  Check for missing values
print("\nMissing values:\n", df.isnull().sum())

#  Drop missing rows if any
df.dropna(inplace=True)



Missing values:
 Transaction ID    0
Amount            0
Type              0
Is Fraud          0
dtype: int64


In [3]:
#  Encode 'Type' using Label Encoding 
le = LabelEncoder()
df['Type_encoded'] = le.fit_transform(df['Type'])

In [4]:
#  Drop 'Transaction ID' (not useful for prediction)
df.drop('Transaction ID', axis=1, inplace=True)


In [5]:
#  Final Data Check
print("\nData after preprocessing:\n", df.head())
print("\nDataset shape:", df.shape)



Data after preprocessing:
    Amount    Type  Is Fraud  Type_encoded
0    1500  credit         0             0
1     250   debit         0             1
2    7600  credit         1             0
3     120   debit         0             1
4    9500  credit         1             0

Dataset shape: (8, 4)


 2. Feature Engineering


In [8]:

# Feature 1: Log transform
df['Amount_log'] = np.log1p(df['Amount'])

# Feature 2: Binary large amount flag
df['Is_Large_Amount'] = df['Amount'].apply(lambda x: 1 if x > 5000 else 0)

# Feature 3: Amount category (Low/Medium/High)
def categorize_amount(x):
    if x < 1000:
        return 'Low'
    elif x < 5000:
        return 'Medium'
    else:
        return 'High'

df['Amount_Category'] = df['Amount'].apply(categorize_amount)
df['Amount_Category_encoded'] = LabelEncoder().fit_transform(df['Amount_Category'])

# Safely drop unnecessary columns
df.drop(['Transaction ID', 'Type', 'Amount_Category'], axis=1, inplace=True, errors='ignore')

# Final Data Preview
print(df.head())


   Amount  Is Fraud  Amount_log  Is_Large_Amount  Amount_Category_encoded
0    1500         0    7.313887                0                        2
1     250         0    5.525453                0                        1
2    7600         1    8.936035                1                        0
3     120         0    4.795791                0                        1
4    9500         1    9.159152                1                        0


3. Train a Decision Tree Classifier

In [9]:

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

#  Define Features and Target
X = df.drop('Is Fraud', axis=1)  # Features
y = df['Is Fraud']               # Target

#  Split the data (75% train, 25% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

#  Train the Decision Tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

#  Predict on test data
y_pred = model.predict(X_test)


4. Evaluate Model Performance

In [11]:
from sklearn.metrics import classification_report, confusion_matrix

# Print classification report
print("Classification Report:\n")
print(classification_report(y_test, y_pred, labels=[0, 1], zero_division=0))

# Print confusion matrix
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred, labels=[0, 1]))


Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.00      0.00      0.00         0

    accuracy                           1.00         2
   macro avg       0.50      0.50      0.50         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:

[[2 0]
 [0 0]]
