## Import Packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Import Model Predicitons

In [None]:
df_predictions = pd.read_csv("PredictionScores_BinaryClass.csv")
df_multiclass_predictions = pd.read_csv("PredictionScores_MultiClass.csv")
df_pairs = pd.read_csv("ModelPairs.csv", low_memory = False)

### Filter Out Predictions we care about

In [None]:
most_projects = ["Core", "Firefox", "Thunderbird", "Bugzilla", "Seamonkey", "DevTools", "MailNews Core", "Toolkit", "Testing", 
            "Infrastructure & Operations", "NSS"]

train_projects = ['Core', 'MailNews Core', 'SeaMonkey', 'Bugzilla', 'Firefox',
       'Other Applications', 'NSS', 'Calendar', 'Thunderbird', 'Toolkit',
       'NSPR', 'Testing', 'Firefox Build System', 'Webtools']

middle_projects = ["developer.mozilla.org", "Conduit", "Developer Documentation", "Developer Documentation", "Participation Infrastructure",
                  "Firefox for iOS", "NSPR", "mozilla.org", "Mozilla Foundation Communications", "Data Science", "Localization Infrastructure and Tools"]

x_ticks = [0,0.2,0.4,0.6,0.8,1]

In [None]:
df_predictions = df_predictions.drop(columns = "Unnamed: 0")

# Display Heat Map (Binary Class Predictions)

### Test Most

In [None]:
# Select duplicate rows except first occurrence based on all columns
df = df_predictions[df_predictions["Test Project"].isin(most_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Most Prediction Scores (BinaryClass)")
plt.show()

### Test Least

In [None]:
# Select duplicate rows except first occurrence based on all columns
df = df_predictions[df_predictions["Test Project"].isin(least_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Least Prediction Scores (BinaryClass)")
plt.show()

### Test Middle

In [None]:
# Select duplicate rows except first occurrence based on all columns
df = df_predictions[df_predictions["Test Project"].isin(middle_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Median Prediction Scores (BinaryClass)")
plt.show()

### Everything

In [None]:
df = df_predictions.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
fig, ax = plt.subplots(figsize=(25,15)) 
ax = sns.heatmap(df,cmap="YlGnBu", ax = ax)
ax.set_title("Prediction Scores (BinaryClass)")
plt.show()

# Display Heat Map (Multi Class Predictions)

### Test most

In [None]:
df = df_multiclass_predictions[df_multiclass_predictions["Test Project"].isin(most_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Prediction Scores (MultiClass)")
plt.show()

### Test least

In [None]:
df = df_multiclass_predictions[df_multiclass_predictions["Test Project"].isin(least_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Prediction Scores (MultiClass)")
plt.show()

### Test middle

In [None]:
df = df_multiclass_predictions[df_multiclass_predictions["Test Project"].isin(middle_projects)]
df = df.drop_duplicates(subset=None, keep='first', inplace=False)
df = df.pivot("Train Project", "Test Project", "Prediction Score")
ax = sns.heatmap(df,cmap="YlGnBu")
ax.set_title("Prediction Scores (MultiClass)")
plt.show()

### Everything

In [None]:
df = df_multiclass_predictions.pivot("Train Project", "Test Project", "Prediction Score")
fig, ax = plt.subplots(figsize=(25,15)) 
ax = sns.heatmap(df,cmap="YlGnBu", ax = ax)
ax.set_title("Prediction Scores (Multi Class)")
plt.show()