<a href="https://colab.research.google.com/github/Sreetam/tamarindncilantro/blob/main/Cilantro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !pip3 install wwo-hist
# !pip3 install geopy

In [2]:
import numpy as np
import pandas as pd
import math
from geopy.geocoders import Nominatim
from wwo_hist import retrieve_hist_data
import geopy.distance
from numpy.lib.type_check import nan_to_num
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_csv("city.csv")
for i in df.columns:
  df[i] = df[i].replace({ "Ab" : 0.5, "yes" : 1, "no" : 0})
# calling the Nominatim tool
loc = Nominatim(user_agent="GetLoc")

In [6]:
regions = pd.read_csv('regions.csv').drop(columns=['Unnamed: 0']).to_dict()
dat = ['date_time', 'humidity', 'tempC']
location_list = df['city']
hist_weather_data = []
for i in location_list:
    hist_weather_data.append(pd.read_csv('./weather_data/'+i+'.csv')[dat])

In [7]:
def getDat(atr, date):
  val = dict()
  for i in range(len(hist_weather_data)):
    val[df['city'][i]] = float(hist_weather_data[i][atr][date])
  return val
# Higher weight means more probability of transmission
def calc_weight(x, y):
  d = geopy.distance.geodesic(regions[x], regions[y]).km
  if d==0: return 0
  return 1/(1 + np.exp(-1/d))

# Calculate the decay as per the formula
def find_decay():
    d = 0.6666667
    return np.array([[d, d]])

In [8]:
# The transmission graph contains info of the vector score, ie, weights
# between two nodes L1 and L2

class transmission_graph:
  def __init__(self, sample=False):
    # n denotes the number of districts or nodes in the graph
    self.tempC = dict()
    self.humidity = dict()
    self.n = len(regions)
    self.city = list(regions.keys())
    # adj_matrix is the adjacency matrix
    # The ith row and jth column of the adjacency matrix
    # stores the weight of the edge between ith and jth node
    self.adj_matrix = dict()
    self.E = [[-1, -1], [-1, -1]]
    self.susceptibility = dict()
    self.initialize_matrix()

  # This function initialises the graph with nodes and stores weights between each node of the graph
  def initialize_matrix(self):
    # Iterate over each node in the graph
    for i in regions.keys():
      # Iterate over all the neighbours
      self.adj_matrix[i] = dict()
      for j in regions.keys():
        self.adj_matrix[i][j] = calc_weight(i, j)
    # Also assign the susceptibility value of each node
    self.compute_susceptibility(0)
  def compute_susceptibility(self, day):
    s = dict()
    self.tempC = getDat('tempC', day)
    self.humidity = getDat('humidity', day)
    for i in regions.keys():
        self.susceptibility[i] = self.calc_susceptibility_score(i)[0][0]
        s[i] = self.calc_susceptibility_score(i)[0][0]
    return s
  # Function to modify the edge weight between two regions
  def change_weight(self, r1, r2, wt):
    self.adj_matrix[r1][r2] = wt
  def calc_susceptibility_score(self, node):
    Sn = np.array([[self.tempC[node], self.humidity[node]]])
    Sn = Sn.transpose()
    decay = find_decay()
    y = np.matmul(decay, Sn)
    return y

In [9]:
g = transmission_graph()
susceptibility_df = dict()
for i in range(len(hist_weather_data[0]['date_time'])):
  susceptibility_df[i] = g.compute_susceptibility(i)
s_df = pd.DataFrame(susceptibility_df)
s_df.columns = [d.strftime('%m/%d/%Y') for d in pd.date_range(df.columns[1], df.columns[-1],freq='d')]
s_df.index.name = "city"
s_df.to_csv("risk_pred.csv")