#### Imports

In [16]:
import sys
import os

import geopandas as gpd
import pandas as pd
import numpy as np
import datetime as dt

#from sklearn.linear_model import LinearRegression
import scipy.optimize as op
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

#### Constants

In [17]:
# General
#BASE_PATH = os.getcwd()
BASE_PATH = os.path.dirname(os.getcwd())

# Mapping
CHANGE_TYPE_MAP = {'Demolition': 0, 'Road': 1, 'Residential': 2, 'Commercial': 3, 'Industrial': 4,
       'Mega Projects': 5}
CHANGE_STATUS_MAP = {'Greenland': 0, 'Land Cleared': 1, 'Excavation': 1, 'Materials Dumped': 3, 'Prior Construction': 3, 'Materials Introduced': 4, 'Construction Started': 5, 'Construction Midway': 6, 'Construction Done': 8, 'Operational': 10, None: None}

# Data
COLORS = ['red', 'green', 'blue']
METRICS = ['std', 'mean']

# Columns groups
COLUMNS_TO_DROP = ['index']
DATE_COLUMNS = ['date0', 'date1', 'date2', 'date3', 'date4']
CHANGE_STATUS_COLUMNS = ['change_status_date0', 'change_status_date1', 'change_status_date2', 'change_status_date3', 'change_status_date4']
CHANGE_STATUS_VALUE_COLUMNS = ['change_status_value_date0', 'change_status_value_date1', 'change_status_value_date2', 'change_status_value_date3', 'change_status_value_date4']

#### Reading data

In [18]:
## Read data
train_df = gpd.read_file(f'{BASE_PATH}/data/1000samples.geojson', index_col=0)
# test_df = gpd.read_file(f'{BASE_PATH}/data/test.geojson', index_col=0)

#### Treatments

In [19]:
## Simple Treatments
train_df[DATE_COLUMNS] = train_df[DATE_COLUMNS].apply(lambda x: pd.to_datetime(x, format='%d-%m-%Y', errors='coerce'))

#train_df['change_type'].map(CHANGE_TYPE_MAP)
train_df[CHANGE_STATUS_VALUE_COLUMNS] = train_df[CHANGE_STATUS_COLUMNS].replace(CHANGE_STATUS_MAP)


  train_df[CHANGE_STATUS_VALUE_COLUMNS] = train_df[CHANGE_STATUS_COLUMNS].replace(CHANGE_STATUS_MAP)


In [36]:
num_samples = train_df.shape[0]
coef = np.zeros((num_samples))
time_ctt = 1e9*60*90*24
ones = np.ones((num_samples,5,1))

Y = np.array(train_df[CHANGE_STATUS_VALUE_COLUMNS].astype(float))
Y_nan_mask = np.isnan(Y)

X = np.array(train_df[DATE_COLUMNS].astype(int))[:,:,np.newaxis]/time_ctt
X = np.dstack((ones,X))

X[Y_nan_mask,:] = 0
Y[Y_nan_mask] = 0

In [42]:
eye = np.eye(2)*0.0001
for i in range(num_samples):
    x = X[i].reshape((5,2))
    y = Y[i].reshape((5))
    coef[i] = (np.linalg.inv(eye+x.T@x)@x.T@y)[1]
    #print(y, train_df["change_type"].iloc[i])
train_df["civilizating_rate"] = coef


In [None]:
def computeCost(theta):
    theta.reshape((num_samples,2))
    theta_expanded = theta.reshape((num_samples,1,2)) * np.ones((num_samples,5,2))
    cost = np.sum(np.power(np.sum(X*theta_expanded, axis=2) - Y, 2))
    return cost

def computeGrad(theta):
    theta.reshape((num_samples,2))
    # Computes the gradient of the cost with respect to the parameters.
    theta_expanded = theta.reshape((num_samples,1,2)) * np.ones((num_samples,5,2))
    simple_cost = np.sum(X*theta_expanded, axis=2) - Y
    simple_cost_expanded = simple_cost[:,:,np.newaxis] * np.ones((num_samples,5,2))
    return np.sum(X*simple_cost_expanded, axis=1)

W = op.minimize(fun=computeCost, x0=np.zeros((num_samples*2)), 
                     method = 'TNC', jac=computeGrad)
W.reshape((num_samples,2))

print(W[0:5,:])
#train_df["civilizating_rate"] = coef



In [None]:
#print(train_df.head())