In [2]:
import datetime
import pandas as pd
import ast
import numpy as np
from scipy.spatial import distance
from tslearn.metrics import dtw,lcss

In [12]:
def advancedfilters(cube,minPopulation,continentCheck,startDate,endDate):
    cube.fillna(0,inplace=True)
    dropped = []
    for i in range(1,len(cube.columns)):
        if(cube.iloc[-1][i] not in continentCheck or int(float(cube.iloc[-2][i])) <= minPopulation):
            dropped.append(cube.columns[i])
    cube.drop(columns=dropped,inplace=True)        
    cube.drop(cube.tail(2).index,inplace=True)
    cube.set_index(cube.columns[0],inplace=True)
    cube.index = pd.to_datetime(cube.index)
    cube = cube.loc[startDate:endDate]
    cube.index.names = ['Date']
    return cube

def slicer(cube,indicator):
    for i in cube.columns:
        for j in range(len(cube)):
            try:
                cube[i].iloc[j] = ast.literal_eval(cube[i].iloc[j])[indicator]
            except:
                continue
    return cube

def time_series(a,period):
    count = period - 1 
    temp_lst = []
    master_lst = []
    column_names = []
    all_lst = []
    for i in a.columns: 
        for k in range(count,len(a)):
            while count >= 0: 
                temp_lst.append(a[i][k-count])
                count = count-1
            master_lst.append(temp_lst)
            column_names.append(a.index[k])
            count = period-1
            temp_lst = []
        column_label = i 
        temp_sr = pd.Series(master_lst,column_names)
        temp_df = pd.DataFrame(data = temp_sr,columns=[column_label])
        all_lst.append(temp_df)
   
        master_lst = []
        column_names = []
        master = pd.concat(all_lst,axis=1)
    return master


def distfunc(target,comp,method):
    target = np.array(target)
    comp = np.array(comp)
    if method == 'euclidean': 
        return distance.euclidean(target,comp)
    if method == 'manhattan':
        return distance.manhattan(target,comp) 
    if method == 'chebyshev':
        return distance.chebyshev(target,comp)
    if method == 'dtw':
        return dtw(target,comp)
    if method == "lcs":
        return 1-lcss(target,comp)
    
def ranker(cube,target_country,target_date,method,top_n):
    target_identifier = target_country + ' ' + datetime.datetime.strftime(target_date,"%Y-%m-%d")
    identifier = []
    comp_values = []
    result =[]
    for i in cube.columns:
        for j in cube.index:
            if i != target_country:
                identifier.append(i + ' ' + datetime.datetime.strftime(j,"%Y-%m-%d"))
                comp_values.append(distfunc(cube[target_country][target_date],cube[i][j],method))

    for i in np.argsort(comp_values)[:top_n].tolist():
        result.append(identifier[i])
    
    return result

def firstRunOutput(cube,targetCountry,firstDate,lastDate,indicator,method,numberOfResults,minPopulation,startDate,endDate,continentCheck):
    cube_filtered = advancedfilters(cube,minPopulation,continentCheck,startDate,endDate)
    sliced = slicer(cube_filtered,indicator)
    sliced = time_series(sliced,(lastDate-firstDate).days)
    result = ranker(sliced,targetCountry,datetime.datetime(lastDate.year,lastDate.month,lastDate.day),method,numberOfResults)
    master_dict = dict()
    for i in result:
        vec = sliced[i.split()[0]][i.split()[1]]
        date = pd.date_range(end=i.split()[1],start=datetime.datetime.strptime(i.split()[1],"%Y-%m-%d") - datetime.timedelta(days=(lastDate-firstDate).days-1))
        dict_sample = dict(zip(date,vec))
        master_dict[i]=dict_sample
    return master_dict

In [13]:
###input sample from the front end###

targetCountry =  "Germany"
firstDate = datetime.date(2021,3,5)
lastDate =  datetime.date(2021,3,26) 
indicator =  "biweekly_cases_per_million"
method  = "dtw"

numberOfResults = 10
minPopulation = 500000 
startDate = datetime.date(2021,1,1) 
endDate = datetime.date(2021,8,1) 
continentCheck = ["Europe"]


In [3]:
cube = pd.read_csv('cube.csv',parse_dates=True)
cube["United Kingdom"].iloc[1]

"{'total_cases': 2662703.0, 'new_cases': 55157.0, 'total_deaths': 75137.0, 'new_deaths': 455.0, 'total_cases_per_million': 39223.0, 'new_cases_per_million': 812.0, 'total_deaths_per_million': 1107.0, 'new_deaths_per_million': 7.0, 'icu_patients': 2420.0, 'icu_patients_per_million': 36.0, 'hosp_patients': 29033.0, 'hosp_patients_per_million': 428.0, 'weekly_icu_admissions': 0.0, 'weekly_icu_admissions_per_million': 0.0, 'weekly_hosp_admissions': 23064.0, 'weekly_hosp_admissions_per_million': 340.0, 'new_tests': 416962.0, 'total_tests': 46247813.0, 'total_tests_per_thousand': 681.0, 'new_tests_per_thousand': 6.0, 'positive_rate': 0.0, 'tests_per_case': 8.0, 'tests_units': 'tests performed', 'total_vaccinations': 1380430.0, 'people_vaccinated': 1380430.0, 'people_fully_vaccinated': 0.0, 'new_vaccinations': 0.0, 'total_vaccinations_per_hundred': 2.0, 'people_vaccinated_per_hundred': 2.0, 'people_fully_vaccinated_per_hundred': 0.0, 'stringency_index': 80.0, 'weekly_cases': 367471.0, 'biweek

In [21]:
out = firstRunOutput(cube,targetCountry,firstDate,lastDate,indicator,method,numberOfResults,minPopulation,startDate,endDate,continentCheck)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [22]:
out

{'Bulgaria 2021-02-28': {Timestamp('2021-02-08 00:00:00', freq='D'): 1332.672,
  Timestamp('2021-02-09 00:00:00', freq='D'): 1388.0810000000001,
  Timestamp('2021-02-10 00:00:00', freq='D'): 1433.415,
  Timestamp('2021-02-11 00:00:00', freq='D'): 1506.8120000000001,
  Timestamp('2021-02-12 00:00:00', freq='D'): 1545.8139999999999,
  Timestamp('2021-02-13 00:00:00', freq='D'): 1568.409,
  Timestamp('2021-02-14 00:00:00', freq='D'): 1573.158,
  Timestamp('2021-02-15 00:00:00', freq='D'): 1607.122,
  Timestamp('2021-02-16 00:00:00', freq='D'): 1662.0980000000002,
  Timestamp('2021-02-17 00:00:00', freq='D'): 1737.943,
  Timestamp('2021-02-18 00:00:00', freq='D'): 1767.1580000000001,
  Timestamp('2021-02-19 00:00:00', freq='D'): 1846.7440000000001,
  Timestamp('2021-02-20 00:00:00', freq='D'): 1854.66,
  Timestamp('2021-02-21 00:00:00', freq='D'): 1861.1360000000002,
  Timestamp('2021-02-22 00:00:00', freq='D'): 1977.708,
  Timestamp('2021-02-23 00:00:00', freq='D'): 2062.331,
  Timestamp(

In [4]:
cube

Unnamed: 0.1,Unnamed: 0,Afghanistan,Africa,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,...,Uzbekistan,Vanuatu,Vatican,Venezuela,Vietnam,Wallis and Futuna,World,Yemen,Zambia,Zimbabwe
0,2021-01-02 00:00:00,"{'total_cases': 51526.0, 'new_cases': 0.0, 'to...","{'total_cases': 2811086.0, 'new_cases': 22919....","{'total_cases': 58991.0, 'new_cases': 675.0, '...","{'total_cases': 100159.0, 'new_cases': 262.0, ...","{'total_cases': 8166.0, 'new_cases': 49.0, 'to...","{'total_cases': 17608.0, 'new_cases': 40.0, 't...",,"{'total_cases': 159.0, 'new_cases': 0.0, 'tota...","{'total_cases': 1634834.0, 'new_cases': 5240.0...",...,"{'total_cases': 77182.0, 'new_cases': 0.0, 'to...","{'total_cases': 1.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 114083.0, 'new_cases': 199.0, ...","{'total_cases': 1482.0, 'new_cases': 8.0, 'tot...",,"{'total_cases': 84728455.0, 'new_cases': 58980...","{'total_cases': 2101.0, 'new_cases': 0.0, 'tot...","{'total_cases': 21230.0, 'new_cases': 233.0, '...","{'total_cases': 14491.0, 'new_cases': 407.0, '..."
1,2021-01-03 00:00:00,"{'total_cases': 51526.0, 'new_cases': 0.0, 'to...","{'total_cases': 2833076.0, 'new_cases': 21990....","{'total_cases': 59438.0, 'new_cases': 447.0, '...","{'total_cases': 100408.0, 'new_cases': 249.0, ...","{'total_cases': 8192.0, 'new_cases': 26.0, 'to...","{'total_cases': 17642.0, 'new_cases': 34.0, 't...",,"{'total_cases': 160.0, 'new_cases': 1.0, 'tota...","{'total_cases': 1640718.0, 'new_cases': 5884.0...",...,"{'total_cases': 77258.0, 'new_cases': 76.0, 't...","{'total_cases': 1.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 114230.0, 'new_cases': 147.0, ...","{'total_cases': 1494.0, 'new_cases': 12.0, 'to...",,"{'total_cases': 85255860.0, 'new_cases': 52740...","{'total_cases': 2101.0, 'new_cases': 0.0, 'tot...","{'total_cases': 21582.0, 'new_cases': 352.0, '...","{'total_cases': 15265.0, 'new_cases': 774.0, '..."
2,2021-01-04 00:00:00,"{'total_cases': 53011.0, 'new_cases': 1485.0, ...","{'total_cases': 2854868.0, 'new_cases': 21792....","{'total_cases': 59623.0, 'new_cases': 185.0, '...","{'total_cases': 100645.0, 'new_cases': 237.0, ...","{'total_cases': 8249.0, 'new_cases': 57.0, 'to...","{'total_cases': 17684.0, 'new_cases': 42.0, 't...",,"{'total_cases': 160.0, 'new_cases': 0.0, 'tota...","{'total_cases': 1648940.0, 'new_cases': 8222.0...",...,"{'total_cases': 77258.0, 'new_cases': 0.0, 'to...","{'total_cases': 1.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 114407.0, 'new_cases': 177.0, ...","{'total_cases': 1497.0, 'new_cases': 3.0, 'tot...",,"{'total_cases': 85809710.0, 'new_cases': 55385...","{'total_cases': 2101.0, 'new_cases': 0.0, 'tot...","{'total_cases': 21993.0, 'new_cases': 411.0, '...","{'total_cases': 15829.0, 'new_cases': 564.0, '..."
3,2021-01-05 00:00:00,"{'total_cases': 53105.0, 'new_cases': 94.0, 't...","{'total_cases': 2881715.0, 'new_cases': 26847....","{'total_cases': 60283.0, 'new_cases': 660.0, '...","{'total_cases': 100873.0, 'new_cases': 228.0, ...","{'total_cases': 8308.0, 'new_cases': 59.0, 'to...","{'total_cases': 17756.0, 'new_cases': 72.0, 't...",,"{'total_cases': 160.0, 'new_cases': 0.0, 'tota...","{'total_cases': 1662730.0, 'new_cases': 13790....",...,"{'total_cases': 77295.0, 'new_cases': 37.0, 't...","{'total_cases': 1.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 114662.0, 'new_cases': 255.0, ...","{'total_cases': 1504.0, 'new_cases': 7.0, 'tot...",,"{'total_cases': 86549841.0, 'new_cases': 74013...","{'total_cases': 2101.0, 'new_cases': 0.0, 'tot...","{'total_cases': 22645.0, 'new_cases': 652.0, '...","{'total_cases': 17194.0, 'new_cases': 1365.0, ..."
4,2021-01-06 00:00:00,"{'total_cases': 53105.0, 'new_cases': 0.0, 'to...","{'total_cases': 2917507.0, 'new_cases': 35792....","{'total_cases': 61008.0, 'new_cases': 725.0, '...","{'total_cases': 101120.0, 'new_cases': 247.0, ...","{'total_cases': 8348.0, 'new_cases': 40.0, 'to...","{'total_cases': 17864.0, 'new_cases': 108.0, '...",,"{'total_cases': 163.0, 'new_cases': 3.0, 'tota...","{'total_cases': 1676171.0, 'new_cases': 13441....",...,"{'total_cases': 77350.0, 'new_cases': 55.0, 't...","{'total_cases': 1.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 114908.0, 'new_cases': 246.0, ...","{'total_cases': 1505.0, 'new_cases': 1.0, 'tot...",,"{'total_cases': 87333632.0, 'new_cases': 78379...","{'total_cases': 2102.0, 'new_cases': 1.0, 'tot...","{'total_cases': 23495.0, 'new_cases': 850.0, '...","{'total_cases': 17804.0, 'new_cases': 610.0, '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,2021-08-09 00:00:00,"{'total_cases': 150778.0, 'new_cases': 968.0, ...","{'total_cases': 7048926.0, 'new_cases': 33595....","{'total_cases': 133981.0, 'new_cases': 69.0, '...","{'total_cases': 182368.0, 'new_cases': 992.0, ...","{'total_cases': 14836.0, 'new_cases': 0.0, 'to...","{'total_cases': 43747.0, 'new_cases': 85.0, 't...",,"{'total_cases': 1348.0, 'new_cases': 10.0, 'to...","{'total_cases': 5029075.0, 'new_cases': 10180....",...,"{'total_cases': 137491.0, 'new_cases': 856.0, ...","{'total_cases': 4.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 312931.0, 'new_cases': 816.0, ...","{'total_cases': 224894.0, 'new_cases': 9334.0,...","{'total_cases': 0.0, 'new_cases': 0.0, 'total_...","{'total_cases': 203384856.0, 'new_cases': 6566...","{'total_cases': 7187.0, 'new_cases': 22.0, 'to...","{'total_cases': 200201.0, 'new_cases': 152.0, ...","{'total_cases': 116853.0, 'new_cases': 526.0, ..."
220,2021-08-10 00:00:00,"{'total_cases': 151013.0, 'new_cases': 235.0, ...","{'total_cases': 7083510.0, 'new_cases': 34584....","{'total_cases': 134201.0, 'new_cases': 220.0, ...","{'total_cases': 183347.0, 'new_cases': 979.0, ...","{'total_cases': 14873.0, 'new_cases': 37.0, 't...","{'total_cases': 43890.0, 'new_cases': 143.0, '...",,"{'total_cases': 1348.0, 'new_cases': 0.0, 'tot...","{'total_cases': 5041487.0, 'new_cases': 12412....",...,"{'total_cases': 138382.0, 'new_cases': 891.0, ...","{'total_cases': 4.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 314480.0, 'new_cases': 1549.0,...","{'total_cases': 232937.0, 'new_cases': 8043.0,...",,"{'total_cases': 204022307.0, 'new_cases': 6374...","{'total_cases': 7198.0, 'new_cases': 11.0, 'to...","{'total_cases': 200830.0, 'new_cases': 629.0, ...","{'total_cases': 117258.0, 'new_cases': 405.0, ..."
221,2021-08-11 00:00:00,"{'total_cases': 151291.0, 'new_cases': 278.0, ...","{'total_cases': 7117990.0, 'new_cases': 34480....","{'total_cases': 134487.0, 'new_cases': 286.0, ...","{'total_cases': 184191.0, 'new_cases': 844.0, ...","{'total_cases': 14891.0, 'new_cases': 18.0, 't...","{'total_cases': 43998.0, 'new_cases': 108.0, '...",,"{'total_cases': 1372.0, 'new_cases': 24.0, 'to...","{'total_cases': 5052884.0, 'new_cases': 11397....",...,"{'total_cases': 139285.0, 'new_cases': 903.0, ...","{'total_cases': 4.0, 'new_cases': 0.0, 'total_...","{'total_cases': 27.0, 'new_cases': 0.0, 'total...","{'total_cases': 314480.0, 'new_cases': 0.0, 't...","{'total_cases': 241543.0, 'new_cases': 8606.0,...",,"{'total_cases': 204729264.0, 'new_cases': 7069...","{'total_cases': 7213.0, 'new_cases': 15.0, 'to...","{'total_cases': 201342.0, 'new_cases': 512.0, ...","{'total_cases': 117954.0, 'new_cases': 696.0, ..."
222,Population,38928341.0,1340598113.0,2877800.0,43851043.0,77265.0,32866268.0,15002.0,97928.0,45195777.0,...,33469199.0,307150.0,809.0,28435943.0,97338583.0,11246.0,7794798729.0,29825968.0,18383956.0,14862927.0
