<a href="https://colab.research.google.com/github/Sameersah/mid-term-catch-assignment/blob/main/b)_Fraud_Detection_With_Feast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fraud Detection With Feast

In [None]:
### Step 1: Install Required Libraries
# Install the necessary libraries for Feast and machine learning
!pip install feast pandas scikit-learn numpy matplotlib

### Step 2: Import Required Libraries
# Import libraries for data handling, model building, and feature management
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from feast import FeatureStore, FeatureService, Entity, FeatureView, ValueType
from feast.types import Float32

### Step 3: Define the Dataset
# Create a synthetic fraud detection dataset
def create_fraud_data():
    np.random.seed(42)
    data = pd.DataFrame({
        'user_id': np.random.randint(1, 1000, 10000),
        'transaction_amount': np.random.uniform(10, 1000, 10000),
        'transaction_count': np.random.randint(1, 50, 10000),
        'is_fraud': np.random.choice([0, 1], size=10000, p=[0.95, 0.05])
    })
    return data

data = create_fraud_data()

# Split the dataset into training and testing
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

### Step 4: Set Up Feast Feature Store
# Define Feast Feature Store configurations
import os
from pathlib import Path

os.makedirs("feature_repo", exist_ok=True)
os.chdir("feature_repo")

# Write feature store configuration
with open("feature_store.yaml", "w") as f:
    f.write("""
project: fraud_detection
registry: ./registry.db
provider: local
online_store:
    path: ./online_store.db
""")

# Define entity and feature view
user_entity = Entity(name="user_id", value_type=ValueType.INT64, description="User ID")

user_fv = FeatureView(
    name="user_features",
    entities=["user_id"],
    schema=[
        Feature(name="transaction_amount", dtype=Float32),
        Feature(name="transaction_count", dtype=Float32),
    ],
    ttl=pd.Timedelta("1h"),
)

# Write Python module for Feast definitions
with open("feature_repo.py", "w") as f:
    f.write("""
from feast import Entity, FeatureView, Feature, ValueType

user = Entity(
    name="user_id",
    value_type=ValueType.INT64,
    description="User ID",
)

user_features = FeatureView(
    name="user_features",
    entities=["user_id"],
    ttl=timedelta(hours=1),
    schema=[
        Feature(name="transaction_amount", dtype=Float32),
        Feature(name="transaction_count", dtype=Float32),
    ],
    online=True,
)
""")

# Initialize Feast
store = FeatureStore(repo_path=".")

### Step 5: Ingest Data into Feature Store
# Materialize features into the online store
store.apply([user_entity, user_fv])
store.materialize_incremental(end_date=pd.Timestamp.now())

### Step 6: Train the Model with Features
# Retrieve features for training and testing
train_features = store.get_historical_features(
    entity_df=train_data[["user_id"]], feature_refs=["user_features:transaction_amount", "user_features:transaction_count"]
).to_df()

X_train = train_features.drop(columns="is_fraud")
y_train = train_features["is_fraud"]

# Train a Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

### Step 7: Evaluate the Model
# Retrieve features for testing
test_features = store.get_historical_features(
    entity_df=test_data[["user_id"]], feature_refs=["user_features:transaction_amount", "user_features:transaction_count"]
).to_df()

X_test = test_features.drop(columns="is_fraud")
y_test = test_features["is_fraud"]

# Evaluate the model
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

### Step 8: Serve Features in Production
# Example of retrieving online features for real-time prediction
online_features = store.get_online_features(
    entity_rows=[{"user_id": 123}],
    feature_refs=["user_features:transaction_amount", "user_features:transaction_count"]
).to_dict()

print("Real-time features:", online_features)
