In [4]:
import tensorflow as tf
import pandas as pd
import requests
import json
import base64
import numpy as np

# Load the dataset
df = pd.read_csv("data/The_Cancer_data_1500_V2.csv")

# Drop the 'Diagnosis' column if present
if 'Diagnosis' in df.columns:
    df.pop("Diagnosis")

# Ensure all values are Python native types
def convert_types(features):
    for key in features:
        if isinstance(features[key], (np.integer, np.int64)):
            features[key] = int(features[key])
        elif isinstance(features[key], (np.floating, np.float64)):
            features[key] = float(features[key])
    
    features['Age'] = int(features['Age'])
    features['Gender'] = int(features['Gender'])
    features['Smoking'] = int(features['Smoking'])
    features['GeneticRisk'] = int(features['GeneticRisk'])
    features['CancerHistory'] = int(features['CancerHistory'])
    return features

def string_feature(value):
    return tf.train.Feature(
        bytes_list=tf.train.BytesList(
            value=[bytes(value, "utf-8")]
        ),
    )
    
def float_feature(value):
    return tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[value]
        ),
    )
    
def int_feature(value):
    return tf.train.Feature(
        int64_list=tf.train.Int64List(
            value=[value]
        ),
    )

def prepare_json(inputs: dict):
    feature_spec = {}
    
    for key, value in inputs.items():
        if isinstance(value, float):
            feature_spec[key] = float_feature(value)
        elif isinstance(value, int):
            feature_spec[key] = int_feature(value)
        elif isinstance(value, str):
            feature_spec[key] = string_feature(value)
        else:
            print(f"Unsupported data type for {key}: {type(value)}")
            
    example = tf.train.Example(
        features=tf.train.Features(feature=feature_spec)
    ).SerializeToString()
        
    result = [
        {
            "examples": {
                "b64": base64.b64encode(example).decode()
            }
        }
    ]
    
    return json.dumps({
        "signature_name": "serving_default",
        "instances": result,
    })

def make_predictions(inputs):
    json_data = prepare_json(inputs)
    
    endpoint = "https://male-kaleena-naba231-54e6cd2a.koyeb.app/v1/models/cancer-failure-model:predict"
    response = requests.post(endpoint, data=json_data)

    try:
        response_data = response.json()
        if "predictions" in response_data:
            prediction = response_data["predictions"][0][0]
            return prediction
        else:
            print("Unexpected response format:")
            print(response_data)
            return None
    except Exception as e:
        print(f"Error processing prediction response: {e}")
        return None

def evaluate_model(df):
    correct_predictions = 0
    total_predictions = 0

    # Load real labels
    real_df = pd.read_csv('data/The_Cancer_data_1500_V2.csv')
    real_labels = real_df['Diagnosis']

    for index, row in df.iterrows():
        features = row.to_dict()
        features = convert_types(features)
        
        prediction = make_predictions(features)
        if prediction is not None:
            # Assuming prediction >= 0.6 means positive class (1), else negative class (0)
            predicted_label = 1 if prediction >= 0.6 else 0
            actual_label = real_labels[index]  # Get the actual label from real_labels
            
            if predicted_label == actual_label:
                correct_predictions += 1
            
            total_predictions += 1
    
    # Calculate binary accuracy
    binary_accuracy = correct_predictions / total_predictions
    
    return binary_accuracy

# Example usage:
binary_accuracy = evaluate_model(df)
print(f"Binary accuracy: {binary_accuracy * 100:.2f}%")

Binary accuracy: 90.13%
