# Deepfake Detection System - Analysis Notebook

This notebook analyzes the performance, costs, and accuracy of the Deepfake Detection System.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Set style
sns.set_theme(style='whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

## 1. Detection Accuracy Analysis

In [None]:
# Simulated detection results
detection_data = {
    'Video Type': ['Real'] * 10 + ['Fake'] * 10,
    'Predicted': ['Real', 'Real', 'Real', 'Real', 'Fake', 'Real', 'Real', 'Real', 'Real', 'Real',
                  'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Fake', 'Real', 'Fake', 'Fake', 'Fake'],
    'Confidence': [0.92, 0.88, 0.95, 0.91, 0.52, 0.89, 0.94, 0.87, 0.93, 0.90,
                   0.98, 0.96, 0.94, 0.97, 0.91, 0.95, 0.48, 0.93, 0.89, 0.92]
}

df_detection = pd.DataFrame(detection_data)
df_detection['Correct'] = df_detection['Video Type'] == df_detection['Predicted']

accuracy = df_detection['Correct'].mean() * 100
print(f'Overall Accuracy: {accuracy:.1f}%')
print(df_detection.head(10))

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(df_detection['Video Type'], df_detection['Predicted'], labels=['Real', 'Fake'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Real', 'Fake'])

fig, ax = plt.subplots(figsize=(8, 6))
disp.plot(ax=ax, cmap='Blues')
plt.title('Detection Confusion Matrix')
plt.show()

## 2. Latency Analysis

In [None]:
# Simulated latency data
latency_data = {
    'Component': ['Frame Extraction', 'Gemini API Call', 'Response Parsing', 'Total'],
    'Time (seconds)': [0.8, 2.5, 0.1, 3.4]
}

df_latency = pd.DataFrame(latency_data)

plt.figure(figsize=(10, 5))
colors = ['#3498db', '#e74c3c', '#2ecc71', '#9b59b6']
plt.barh(df_latency['Component'], df_latency['Time (seconds)'], color=colors)
plt.xlabel('Time (seconds)')
plt.title('Detection Pipeline Latency Breakdown')
for i, v in enumerate(df_latency['Time (seconds)']):
    plt.text(v + 0.05, i, f'{v}s', va='center')
plt.tight_layout()
plt.show()

## 3. Cost Analysis

In [None]:
# Cost breakdown
cost_data = {
    'Category': ['Input Tokens', 'Output Tokens', 'Compute'],
    'Cost per Video ($)': [0.000263, 0.00006, 0.0001]
}

df_cost = pd.DataFrame(cost_data)

plt.figure(figsize=(8, 8))
plt.pie(df_cost['Cost per Video ($)'], labels=df_cost['Category'], autopct='%1.1f%%',
        colors=['#3498db', '#e74c3c', '#2ecc71'], explode=(0.05, 0.05, 0.05))
plt.title('Cost Breakdown per Video Analysis')
plt.show()

total_cost = df_cost['Cost per Video ($)'].sum()
print(f'Total cost per video: ${total_cost:.6f}')

## 4. Confidence Distribution

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(data=df_detection, x='Confidence', hue='Video Type', kde=True, bins=10)
plt.title('Confidence Score Distribution by Video Type')
plt.xlabel('Confidence Score')
plt.ylabel('Count')
plt.axvline(x=0.7, color='red', linestyle='--', label='Threshold (0.7)')
plt.legend()
plt.show()