<a href="https://colab.research.google.com/github/Manchal09/mainflow-tasks/blob/main/MainflowTask8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Section 1: Feature Engineering & Model Tuning**

In [9]:
#DATASET CREATION
import pandas as pd

# Create dataset
data = {
    "feature1": [10, 5, 8, 12, 7, 9, 15, 6],
    "feature2": [20, 15, 18, 22, 17, 19, 25, 16],
    "feature3": [30, 25, 28, 32, 27, 29, 35, 26],
    "existing_feature1": [15, 10, 13, 17, 12, 14, 20, 11],
    "existing_feature2": [25, 20, 23, 27, 22, 24, 30, 21],
    "target": [1, 0, 1, 0, 1, 0, 1, 0]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save as CSV file
df.to_csv("feature_engineering.csv", index=False)

print("Dataset saved as feature_engineering.csv")

Dataset saved as feature_engineering.csv


In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

strat_kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

grid_search = GridSearchCV(rf, param_grid, cv=strat_kfold, scoring="accuracy", n_jobs=-1)

# Load dataset
df = pd.read_csv("feature_engineering.csv")

# Feature Engineering: Creating new features
df["sum_feature"] = df["feature1"] + df["feature2"] + df["feature3"]
df["avg_feature"] = df[["feature1", "feature2", "feature3"]].mean(axis=1)

# Splitting data
X = df.drop(columns=["target"])  # Features
y = df["target"]  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)

# Model Training with Hyperparameter Tuning
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5, 10]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring="accuracy", n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model evaluation
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Best Model Accuracy: {accuracy:.4f}")
print("Best Parameters:", grid_search.best_params_)

Best Model Accuracy: 0.0000
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}


**Section 2: Fraud Detection with Decision Trees**

In [10]:
#DATASET CREATION
import pandas as pd

# Create dataset
data = {
    "Transaction ID": [1, 2, 3, 4, 5, 6, 7, 8],
    "Amount": [500, 2000, 150, 3000, 1000, 2500, 50, 4000],
    "Type": ["credit", "debit", "credit", "debit", "credit", "debit", "credit", "debit"],
    "Is Fraud": [0, 1, 0, 1, 0, 1, 0, 1]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save as CSV file
df.to_csv("fraud_detection.csv", index=False)

print("Dataset saved as fraud_detection.csv")

Dataset saved as fraud_detection.csv


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv("fraud_detection.csv")

# Convert categorical features using Label Encoding
label_encoder = LabelEncoder()
df["Type"] = label_encoder.fit_transform(df["Type"])

# Handle missing values (if any)
df.fillna(df.mean(), inplace=True)

# Feature Engineering: Creating derived features
df["Transaction_Risk"] = df["Amount"] / df["Amount"].max()

# Define features and target variable
X = df.drop(columns=["Is Fraud"])
y = df["Is Fraud"]

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree Classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate model
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")

Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000
