-
Notifications
You must be signed in to change notification settings - Fork 558
/
correlations.py
19 lines (16 loc) · 870 Bytes
/
correlations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd
import sys
first_file = sys.argv[1]
second_file = sys.argv[2]
def corr(first_file, second_file):
first_df = pd.read_csv(first_file,index_col=0)
second_df = pd.read_csv(second_file,index_col=0)
# assuming first column is `prediction_id` and second column is `prediction`
prediction = first_df.columns[0]
# correlation
print("Finding correlation between: {} and {}".format(first_file,second_file))
print("Column to be measured: {}".format(prediction))
print("Pearson's correlation score: {}".format(first_df[prediction].corr(second_df[prediction],method='pearson')))
print("Kendall's correlation score: {}".format(first_df[prediction].corr(second_df[prediction],method='kendall')))
print("Spearman's correlation score: {}".format(first_df[prediction].corr(second_df[prediction],method='spearman')))
corr(first_file, second_file)