In [1]:
import random
import json
import numpy as np
import pandas as pd
import requests
from urllib.request import urlopen
# from github import Github

path = 'mockdata/'

# =============================
#              CA-BACKEND DATA
# =============================

## 1. Proposals data from feedback-challenge-tool-backend

In [2]:
url = "https://raw.githubusercontent.com/Project-Catalyst/feedback-challenge-tool-backend/master/data/f8/proposals.json"
response = urlopen(url)
proposals = json.loads(response.read())

## 2. Structured CA data (template data)

In [3]:
df = pd.json_normalize(proposals)
df = df[['id', 'category']].copy()
df['assessments_count'] = 0
df = df.sort_values('id', axis='index').reset_index(drop=True)
df.rename(columns={'id':'proposal_id'}, inplace=True)

df_ca_snapshot = df.groupby('category')[['proposal_id','assessments_count']].apply(lambda x: x.to_dict(orient='records'))
df_ca_snapshot = df_ca_snapshot.reset_index()
df_ca_snapshot.rename(columns={'category':'challenge_id' , 0:'proposals'}, inplace=True)

# json final format
ca_snapshot_template = df_ca_snapshot.to_dict(orient='records')
with open(path+'ca_snapshot_template.json', 'w') as f:
    json.dump(ca_snapshot_template, f, indent=2)

In [4]:
ca_snapshot_template

[{'challenge_id': 26433,
  'proposals': [{'proposal_id': 396607, 'assessments_count': 0},
   {'proposal_id': 396949, 'assessments_count': 0},
   {'proposal_id': 397164, 'assessments_count': 0},
   {'proposal_id': 397419, 'assessments_count': 0},
   {'proposal_id': 397531, 'assessments_count': 0},
   {'proposal_id': 397630, 'assessments_count': 0},
   {'proposal_id': 397636, 'assessments_count': 0},
   {'proposal_id': 398184, 'assessments_count': 0},
   {'proposal_id': 398206, 'assessments_count': 0},
   {'proposal_id': 398664, 'assessments_count': 0},
   {'proposal_id': 398834, 'assessments_count': 0},
   {'proposal_id': 399055, 'assessments_count': 0},
   {'proposal_id': 399160, 'assessments_count': 0},
   {'proposal_id': 399803, 'assessments_count': 0},
   {'proposal_id': 399816, 'assessments_count': 0},
   {'proposal_id': 399961, 'assessments_count': 0},
   {'proposal_id': 400273, 'assessments_count': 0},
   {'proposal_id': 400305, 'assessments_count': 0},
   {'proposal_id': 400555,

## 3. Updating CA data (from ca-backend api request)

In [5]:
# read < template data > (generated in 2.)
with open(path+'ca_snapshot_template.json', 'r') as f:
    ca_snapshot_template = json.load(f)

# unplack template data json-format into formatted pd.DataFrame
df = pd.json_normalize(ca_snapshot_template)
df_ca_snapshot = pd.concat([pd.concat([ pd.Series([df['challenge_id'].loc[i]]*len(df['proposals'].loc[i]), name='challenge_id'),
                                        pd.json_normalize(df['proposals'].loc[i])], axis='columns')
                                        for i in range(df.shape[0])],
                            axis='index')
df_ca_snapshot.set_index('proposal_id', inplace=True)
df_ca_snapshot.sort_index(inplace=True)

# read the data saved from IdeaScale API request on ca-tool-backend 
url = "https://raw.githubusercontent.com/Project-Catalyst/ca-tool-backend/master/proposals.json"
response = urlopen(url)
ca_proposals_count = json.loads(response.read())

# read data from backend into formatted pd.DataFrame
df_cabackend = pd.json_normalize(ca_proposals_count)
df_cabackend.rename(columns={'id':'proposal_id'}, inplace=True)
df_cabackend.set_index('proposal_id', inplace=True)
df_cabackend.sort_index(inplace=True)

# update data in the template DataFrame
df_ca_snapshot['assessments_count'] = df_cabackend['assessments_count']
df_ca_snapshot.reset_index(inplace=True)

# pack updated data into json format
df_ch = df_ca_snapshot.groupby('challenge_id')[['proposal_id','assessments_count']].apply(lambda x: x.to_dict(orient='records'))
df_ch = df_ch.reset_index()
df_ch.rename(columns={0:'proposals'}, inplace=True)

ca_snapshot = df_ch.to_dict(orient='records')

with open(path+'ca_snapshot_outbackends.json', 'w') as f:
    json.dump(ca_snapshot, f, indent=2)

In [6]:
df_cabackend

Unnamed: 0_level_0,assessments_count
proposal_id,Unnamed: 1_level_1
396522,138
396524,54
396541,38
396542,66
396544,27
...,...
405479,13
405482,26
405486,31
405488,15


In [7]:
ca_snapshot

[{'challenge_id': 26433,
  'proposals': [{'proposal_id': 396607, 'assessments_count': 14},
   {'proposal_id': 396949, 'assessments_count': 9},
   {'proposal_id': 397164, 'assessments_count': 7},
   {'proposal_id': 397419, 'assessments_count': 29},
   {'proposal_id': 397531, 'assessments_count': 15},
   {'proposal_id': 397630, 'assessments_count': 6},
   {'proposal_id': 397636, 'assessments_count': 11},
   {'proposal_id': 398184, 'assessments_count': 9},
   {'proposal_id': 398206, 'assessments_count': 17},
   {'proposal_id': 398664, 'assessments_count': 7},
   {'proposal_id': 398834, 'assessments_count': 9},
   {'proposal_id': 399055, 'assessments_count': 19},
   {'proposal_id': 399160, 'assessments_count': 14},
   {'proposal_id': 399803, 'assessments_count': 9},
   {'proposal_id': 399816, 'assessments_count': 6},
   {'proposal_id': 399961, 'assessments_count': 26},
   {'proposal_id': 400273, 'assessments_count': 10},
   {'proposal_id': 400305, 'assessments_count': 7},
   {'proposal_id'

# ============================
#           vCA-BACKEND DATA
# ============================

## 1. vCA-backend ongoing data (api request)

In [8]:
# vca-backend api
url = "https://vca-backend.herokuapp.com/"
response = urlopen(url)
vca_assess_count = json.loads(response.read())

# read data from backend
df_vcabackend = pd.Series(vca_assess_count, name='vca_reviews_count')
df_vcabackend.index.name = 'assessment_id'
df_vcabackend = df_vcabackend.reset_index()
df_vcabackend['assessment_id'] = df_vcabackend['assessment_id'].astype(int)
df_vcabackend = df_vcabackend.set_index('assessment_id')

In [9]:
df_vcabackend

Unnamed: 0_level_0,vca_reviews_count
assessment_id,Unnamed: 1_level_1
1,28
2,21
3,19
4,18
5,19
...,...
11669,8
11670,8
11671,22
11672,27


## 2. Reading (latest) ca-snapshot (generated by ca-backend)

In [10]:
# read most recent ca-data from dashboard 
with open(path+'ca_snapshot_outbackends.json', 'r') as f:
    ca_snapshot = json.load(f)
# df: challenge_id by json-packed proposals (id & assessments_count)
df_ch = pd.json_normalize(ca_snapshot)

# df: proposal_id, challenge_id, assessments_count
df_cabackend = pd.concat([ pd.concat( # unpack proposals by challenge into dataframe with challenge_id column
                            [ pd.Series([df_ch['challenge_id'].loc[i]]*len(df_ch['proposals'].loc[i]), name='challenge_id'),
                              pd.json_normalize(df_ch['proposals'].loc[i])], 
                            axis='columns')
                    for i in range(df_ch.shape[0])],
                    axis='index')
df_cabackend.set_index('proposal_id', inplace=True)
df_cabackend.sort_index(inplace=True)

In [11]:
df_cabackend

Unnamed: 0_level_0,challenge_id,assessments_count
proposal_id,Unnamed: 1_level_1,Unnamed: 2_level_1
396522,26434,138
396524,26438,54
396541,26434,38
396542,26444,66
396544,26452,27
...,...,...
405479,26440,13
405482,26436,26
405486,26438,31
405488,26455,15


## 3. vCA-Tool: read table proposal_id & assessments_id

In [12]:
# read assessments data from vca-tool repository
ASSESSMENTS_PATH = 'https://raw.githubusercontent.com/Project-Catalyst/vca-tool/master/src/assets/data/assessments.csv'
assessments = pd.read_csv(ASSESSMENTS_PATH)
df_assess = assessments[['id','proposal_id']].copy()
df_assess.rename(columns={'id':'assessment_id'}, inplace=True)

# proposal_id(index), list of assessment_id
df_ass_by_proposal = df_assess.groupby('proposal_id')['assessment_id'].apply(lambda x: x.to_list()).reset_index().set_index('proposal_id')

In [13]:
df_assess

Unnamed: 0,assessment_id,proposal_id
0,1,396560
1,2,398119
2,3,398309
3,4,400975
4,5,401098
...,...,...
10730,11669,398384
10731,11670,400110
10732,11671,396524
10733,11672,396541


In [14]:
df_ass_by_proposal

Unnamed: 0_level_0,assessment_id
proposal_id,Unnamed: 1_level_1
396522,"[59, 334, 357, 578, 625, 843, 1096, 1138, 1335..."
396524,"[167, 579, 1097, 1429, 1894, 1903, 2401, 2470,..."
396541,"[70, 580, 621, 1336, 1430, 2294, 2471, 2493, 2..."
396542,"[71, 384, 837, 1357, 1431, 2134, 2397, 2446, 2..."
396544,"[234, 248, 581, 666, 1432, 3004, 3053, 3468, 3..."
...,...
405479,"[8, 64, 347, 355, 466, 653, 1306, 1786, 5566, ..."
405482,"[279, 356, 1739, 1892, 3263, 4247, 4347, 4394,..."
405486,"[173, 348, 480, 624, 847, 1089, 1122, 1688, 25..."
405488,"[612, 1137, 2811, 5162, 6845, 7229, 7728, 8532..."


## 4. Create vCA-snapshot
#### Update df_cabackend adding column < assessments : json packed df_vcabackend {assessment_id, vca_reviews_count} >

In [15]:
df_vca_snapshot = df_cabackend.copy()
get_assessments = lambda proposal_id: df_vcabackend.loc[df_ass_by_proposal.loc[proposal_id].item()].sort_index().reset_index().to_dict(orient='records')
df_vca_snapshot['assessments'] = pd.Series(map(get_assessments, df_cabackend.index), index=df_cabackend.index)
df_vca_snapshot = df_vca_snapshot.reset_index()

In [16]:
# !!!!! differences between assessments_count and len(assessments) comes from vCA Tool link-key table

In [17]:
df_vca_snapshot

Unnamed: 0,proposal_id,challenge_id,assessments_count,assessments
0,396522,26434,138,"[{'assessment_id': 59, 'vca_reviews_count': 21..."
1,396524,26438,54,"[{'assessment_id': 167, 'vca_reviews_count': 2..."
2,396541,26434,38,"[{'assessment_id': 70, 'vca_reviews_count': 29..."
3,396542,26444,66,"[{'assessment_id': 71, 'vca_reviews_count': 30..."
4,396544,26452,27,"[{'assessment_id': 234, 'vca_reviews_count': 2..."
...,...,...,...,...
1148,405479,26440,13,"[{'assessment_id': 8, 'vca_reviews_count': 14}..."
1149,405482,26436,26,"[{'assessment_id': 279, 'vca_reviews_count': 1..."
1150,405486,26438,31,"[{'assessment_id': 173, 'vca_reviews_count': 1..."
1151,405488,26455,15,"[{'assessment_id': 612, 'vca_reviews_count': 1..."


In [18]:
df_vca_ch = df_vca_snapshot.groupby('challenge_id')[['proposal_id','assessments_count','assessments']].apply(lambda x: x.to_dict(orient='records')).reset_index().rename(columns={0:'proposals'})
vca_data_updated = df_vca_ch.to_dict(orient='records')

In [19]:
vca_data_updated

[{'challenge_id': 26433,
  'proposals': [{'proposal_id': 396607,
    'assessments_count': 14,
    'assessments': [{'assessment_id': 3279, 'vca_reviews_count': 7},
     {'assessment_id': 3676, 'vca_reviews_count': 4},
     {'assessment_id': 3728, 'vca_reviews_count': 8},
     {'assessment_id': 3932, 'vca_reviews_count': 4},
     {'assessment_id': 4497, 'vca_reviews_count': 5},
     {'assessment_id': 4802, 'vca_reviews_count': 4},
     {'assessment_id': 4866, 'vca_reviews_count': 5},
     {'assessment_id': 5190, 'vca_reviews_count': 4},
     {'assessment_id': 6897, 'vca_reviews_count': 3},
     {'assessment_id': 8588, 'vca_reviews_count': 6},
     {'assessment_id': 9265, 'vca_reviews_count': 6},
     {'assessment_id': 10898, 'vca_reviews_count': 4},
     {'assessment_id': 11589, 'vca_reviews_count': 4}]},
   {'proposal_id': 396949,
    'assessments_count': 9,
    'assessments': [{'assessment_id': 2670, 'vca_reviews_count': 5},
     {'assessment_id': 3678, 'vca_reviews_count': 4},
     {'

In [20]:
with open(path+'vca_snapshot_outbackends.json', 'w') as f:
    json.dump(vca_data_updated, f, indent=2)