In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score, classification_report, confusion_matrix
import joblib

### Task 1 - Credit Card Fraud Prediction

In [67]:
import joblib
import pandas as pd
from ipynb.fs.full.PS4_task1_training import extract_time_feats, feats, remove_irr
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

def load_fraud_model(model_path):
    """
    Loads the model using joblib
    Params :
    model_path (str) : Path to the  model

    Returns :
    model (Random forest) : Returns the previously trained model
    """
    model = joblib.load(model_path)
    return model

def load_fraud_test_data(test_data_path):
    """
    Perform feature extraction using functions from the training notebook
    Params :
    test_data_path (str) : Path to the testing data

    Returns :
    credit_date (df) : Dataframe with new features and irrelevant ones removed
    """
    credit_data = pd.read_csv(test_data_path)# Assume CSV for simplicity
    credit_data = extract_time_feats(credit_data)
    credit_data = feats(credit_data)
    credit_data = remove_irr(credit_data)
 

    return credit_data

def preprocess_data(test_data):
    """
    Apply any standardization and split the data
    Params :
    test_data (df) : Dataframe of data

    Returns :
    X_scaled (df): feature dataframe that is standardized
    y (arr) : Fraud labels
    """

    X = test_data.drop(columns = ['is_fraud'])
    y = test_data['is_fraud']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

def evaluate_model(model, X_test, y_test):
    """
    Evaluates the model
    Params : 
    model (sklearn model) : Trained model for fraud detection
    X_test (dataframe) : 
    y_test (array) : Labels for fraud
    
    Prints :
    Classification report
    Accuracy of the model
    F1 of the fraud class
    F1-avg of both classes
    """
    y_pred = model.predict(X_test)
    
    # Calculate evaluation metrics
    report = classification_report(y_test, y_pred)
    print(report)
    accuracy = accuracy_score(y_test, y_pred)
    f1_fraud = f1_score(y_test, y_pred)
    f1_avg  = f1_score(y_test, y_pred, average="weighted")
    print("ACCURACY: ", accuracy)
    print("F1-Fraud : ", f1_fraud)
    print("F1-Avg : ", f1_avg)


## Task 1 Data Path

In [68]:
test_data_path = 'cct_train.csv'

In [69]:
model_path = 'task_1_model.pkl'  

fraud_model = load_fraud_model(model_path)
    
test_data = load_fraud_test_data(test_data_path)
X_test, y_test = preprocess_data(test_data)

evaluate_model(fraud_model, X_test, y_test)

Remaining columns: Index(['amt', 'is_fraud', 'hour', 'day_of_month', 'month', 'year', 'quarter',
       'age', 'category_cat', 'profile_cat', 'is_in_us', 'suspicious_cat',
       'merchant_freq_encoded'],
      dtype='object')
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    697228
           1       1.00      0.99      0.99      3579

    accuracy                           1.00    700807
   macro avg       1.00      0.99      1.00    700807
weighted avg       1.00      1.00      1.00    700807

ACCURACY:  0.9999243729015264
F1-Fraud :  0.9925404644616467
F1-Avg :  0.9999240922671615


### Task 2 - Human Activity Recognition Prediction

In [70]:
test_df_2 = pd.read_csv('har_train.csv')

In [71]:
X = test_df_2.iloc[:, :-1]
y = test_df_2.iloc[:, -1]

In [72]:
loaded_label_encoder = joblib.load('label_encoder_task2.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [73]:
y_test_encoded = loaded_label_encoder.transform(y)

In [74]:
import seaborn as sns
import matplotlib.pyplot as plt
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Macro Averaged F1 Score:", macro_f1)
    print("Classification Report: ")
    print(classification_report(y_test, y_pred))
    return accuracy, macro_f1

In [75]:
loaded_pipeline = joblib.load("pipeline_task2.pkl")
accuracy, macro_f1 = evaluate(loaded_pipeline, X, y_test_encoded)

Accuracy: 1.0
Macro Averaged F1 Score: 1.0
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       274
           1       1.00      1.00      1.00       387
           2       1.00      1.00      1.00        12
           3       1.00      1.00      1.00       117
           4       1.00      1.00      1.00        17
           5       1.00      1.00      1.00       696
           6       1.00      1.00      1.00       122
           7       1.00      1.00      1.00       579
           8       1.00      1.00      1.00        59
           9       1.00      1.00      1.00        84
          10       1.00      1.00      1.00        94

    accuracy                           1.00      2441
   macro avg       1.00      1.00      1.00      2441
weighted avg       1.00      1.00      1.00      2441



In [76]:
print("Accuracy: ", accuracy)
print("Macro Averaged F1-score: ", macro_f1)

Accuracy:  1.0
Macro Averaged F1-score:  1.0
