In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from geopy.geocoders import Nominatim
import math
import statistics
import pandas as pd
import matplotlib.pyplot as plt

### Data processing:

In [None]:
#Countries_test = {}
#for index,data in df.iterrows():
#    num_of_null = 0
#    avg = 0
#    count = 0
#    nums = []
#    for i in range(1985,2010):
#        value = data[str(i)]
#        if type(value) == str:
#            value = float(value.replace(",",""))
#        if  math.isnan(value):
#            num_of_null += 1
#        else:
#            avg += value
#            count += 1
#            nums.append(value)
    
#    Countries_test[data["Country"]] = {"avg": round(avg / count,2) if count != 0 else math.nan, 
#                                       "null%": round((num_of_null / (num_of_null + count)) * 100,2),
#                                       "mean": round(statistics.mean(nums),2)
#                                       }
#Countries_test

In [5]:
def getCountries():
    df = pd.read_excel('data/History_2.xlsx')
    Countries = {}

    for index,data in df.iterrows():
        longest_chain = []
        current_longest = []
        start_year = 1985
        for i in range(1985,2010):
            value = data[str(i)]
            if type(value) == str:
                value = float(value.replace(",",""))

            if  math.isnan(value):
                start_year = start_year if len(current_longest) > len(longest_chain) else i - len(longest_chain)
                current_longest = current_longest if len(current_longest) > len(longest_chain) else longest_chain
                longest_chain = []
            else:
                longest_chain.append(value)
                
        start_year = start_year if len(current_longest) > len(longest_chain) else i - len(longest_chain) + 1            
        current_longest = current_longest if len(current_longest) > len(longest_chain) else longest_chain
        Countries[data["Country"]] = (current_longest,start_year)

    return Countries

Countries = getCountries()

In [None]:
def toTensor(country_dataset, seq_length=3):
    all_x, all_y,scalers = [], [], []

    for name, data in country_dataset.items():
        seq = data[0]
        if len(seq) <= seq_length:
            continue
            
        scaler = MinMaxScaler()                                             # chatted with GPT for scaling down the data
        scaled = scaler.fit_transform(np.array(seq).reshape(-1,1)).flatten()

        for i in range(len(scaled) - seq_length):
            prev = scaled[i:i+seq_length]
            target = scaled[i+seq_length]

            all_x.append(prev)
            all_y.append(target)
            scalers.append(scaler)
    
    return torch.tensor(np.array(all_x), dtype=torch.float32).unsqueeze(-1), torch.tensor(np.array(all_y), dtype=torch.float32), scalers

In [None]:
def getData():
    countries = getCountries()
    x,y,scalers = toTensor(countries)
    train_size = int(len(x) * 0.67)

    train_x = x[:train_size]
    test_x = x[train_size:]
    train_y = y[:train_size]
    test_y = y[train_size:]
    train_s = scalers[:train_size] 
    test_s = scalers[train_size:]

    return train_x,train_y,train_s,test_x,test_y,test_s

In [7]:
def setDevice():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

device = setDevice()

### Model:

In [None]:
#used code provided/created in the labs surounding LSTM
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_dim, num_layers=2):
        super(LSTMModel, self).__init__()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.hidden_size = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_dim,num_layers,batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)

        out, _ = self.lstm(x, (h0,c0))
        out = self.fc(out[:,-1,:])
        return out

### Train the model:

In [None]:
# https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
# used for insperation

def train_and_evaluate():
    train_x,train_y,train_s,test_x,test_y,test_s = getData()
    LR = 0.0001
    num_epochs = 2000   
    train_ds = TensorDataset(train_x.float(), train_y.float())
    test_ds = TensorDataset(test_x.float(), test_y.float())

    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=8)

    model = LSTMModel(1, 128).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    losses = []
    val_losses = []
    for epoch in range(num_epochs): # used code provided in the lab
        model.train()
        total_loss = 0
        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.view(-1, 1).to(device)
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        losses.append(total_loss)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for xb, yb in test_loader:
                xb = xb.to(device)
                yb = yb.view(-1, 1).to(device)
                pred = model(xb)
                loss = criterion(pred, yb)
                val_loss += loss.item()
        val_losses.append(val_loss)
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Val_loss: {val_loss:.4f}")

    return model, losses, val_losses
model, losses, val_losses = train_and_evaluate()


In [None]:
def export_model(model):
    model_price = torch.jit.script(model)
    model_price.save('model/model_price.pt')

export_model(model)

In [None]:
#def visualize_losses():
#    model, losses,val_losses = train_and_evaluate()
#    plt.plot(losses, label="training loss")
#    plt.plot(val_losses, label="validation loss")
#    plt.legend()
#    plt.show()


### Predict the Price:

In [1]:
def toTensorPred(dataset, seq_length=3):
    if len(dataset) < seq_length:
        return False

    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(np.array(dataset).reshape(-1,1)).flatten()

    return scaled[-seq_length:], scaler



In [2]:
def inflation_value(i):
    return math.pow((1+0.03), i)

def predict_end_year(input_sequence,start_year, end_year):
    start_year_a = start_year + len(input_sequence) - 1
    inp_seq, sc = toTensorPred(input_sequence)
    model = torch.jit.load("model/model_price.pt")

    values = []
    
    for i in range(end_year - start_year_a):
        inp = torch.tensor(inp_seq, dtype=torch.float32).unsqueeze(0).unsqueeze(-1).to(device) # used gpt due to not understanding unsqueezed
        with torch.no_grad():
            pred = model(inp)
        pred_value = pred.item()
        inf = inflation_value(i)
        values.append(sc.inverse_transform([[pred_value]])[0][0] * inf)
        inp_seq = np.append(inp_seq[1:],pred_value)

    return values


In [None]:
#print(Countries["Denmark"][1])
#print("prediction with accounted inflation: " + str(test[-1]))
#print("goals: 21,421")
#plt.plot(Countries["Denmark"][0], label='Original')
#plt.plot(range(len(Countries["Denmark"][0]), len(Countries["Denmark"][0]) + len(test)), test, label='Forecast', color='red')
#plt.show()

In [8]:
# https://www.geeksforgeeks.org/get-the-city-state-and-country-names-from-latitude-and-longitude-using-python/
# used for getting the country from a location

def predict_end_year_from_cords(lat, long, end_year):
    geolocator = Nominatim(user_agent="my_geopy_app")

    location = geolocator.reverse(str(lat)+","+str(long), language='en')

    address = location.raw['address']
    country = address['country']
    
    if(country in Countries.keys()):
        return round(predict_end_year(Countries[country][0],Countries[country][1],end_year)[-1],2)
    else:
        avg_pred = []
        for c in Countries.keys():
            avg_pred.append(predict_end_year(Countries[c][0],Countries[c][1],end_year)[-1])
        return round(statistics.mean(avg_pred),2)
        

predict_end_year_from_cords(-33.9248685,18.4240553,2024)

25430.46