### 1. Import necessary modules

In [1]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import models, transforms

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### 2. Specify data paths

In [3]:
DATADIR = './cdsdata'

DATADIR_TEMP = os.path.join(DATADIR, 'temperature')
if not os.path.exists(DATADIR_TEMP):
    os.makedirs(DATADIR_TEMP)

DATADIR_PRECIP = os.path.join(DATADIR, 'precipitation')
if not os.path.exists(DATADIR_PRECIP):
    os.makedirs(DATADIR_PRECIP)

In [5]:
years = [
    '1979', '1980', '1981',
    '1982', '1983', '1984',
    '1985', '1986', '1987',
    '1988', '1989', '1990',
    '1991', '1992', '1993',
    '1994', '1995', '1996',
    '1997', '1998', '1999',
    '2000', '2001', '2002',
    '2003', '2004', '2005',
    '2006', '2007', '2008',
    '2009', '2010', '2011',
    '2012', '2013', '2014',
    '2015', '2016', '2017',
    '2018', '2019'
]

months = [
    '01', '02', '03',
    '04', '05', '06',
    '07', '08', '09',
    '10', '11', '12'
]

### 3. Extract features through ResNet

In [8]:
# inputs for ResNet
temperature_data = []

data_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

for y in years:
    for m in months:
        with open(f'{DATADIR_TEMP}\\{y}-{m}.npy', 'rb') as f:
            temperature_ndarray = np.transpose(np.load(f), (1, 2, 0))
        temperature_transformed = data_transforms(temperature_ndarray.astype(float))
        temperature_data.append(temperature_transformed)

my_temperature_dataloader = DataLoader(temperature_data)#, batch_size=1) # create dataloader

# for y in years:
#     for m in months:
#         with open(f'{DATADIR_TEMP}\\{y}-{m}.npy', 'rb') as f:
#             temperature_ndarray = np.load(f)
#             for t in range(3):
#                 temperature_data.append(temperature_ndarray[t])

# tensor_temperature = torch.Tensor(np.array(temperature_data)) # transform to torch tensor

# my_temperature = TensorDataset(tensor_temperature,) # create datset
# my_temperature_dataloader = DataLoader(my_temperature, batch_size=12) # create dataloader

In [9]:
# ResNet15
model_conv = models.resnet18(pretrained=True)
modules=list(model_conv.children())[:-1]
resnet15=nn.Sequential(*modules)
for p in resnet15.parameters():
    p.requires_grad = False



In [10]:
# temperature features
for idx, img in enumerate(my_temperature_dataloader):
    if idx == 0:
        temp_features = torch.squeeze(resnet15(img.float()), (2, 3))
    else:
        temp_features = torch.concat((temp_features, torch.squeeze(resnet15(img.float()), (2, 3))), dim=0)

print(temp_features.shape)

torch.Size([492, 512])


In [12]:
# inputs for ResNet
precipitation_data = []

data_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

for y in years:
    for m in months:
        with open(f'{DATADIR_PRECIP}\\{y}-{m}.npy', 'rb') as f:
            precipitation_ndarray = np.transpose(np.load(f), (1, 2, 0))
        precipitation_transformed = data_transforms(precipitation_ndarray)
        precipitation_data.append(precipitation_transformed)

my_precipitation_dataloader = DataLoader(precipitation_data)#, batch_size=1) # create dataloader

In [13]:
# precipitation features
for idx, img in enumerate(my_precipitation_dataloader):
    if idx == 0:
        precip_features = torch.squeeze(resnet15(img), (2, 3))
    else:
        precip_features = torch.concat((precip_features, torch.squeeze(resnet15(img), (2, 3))), dim=0)

print(precip_features.shape)

torch.Size([492, 512])


In [19]:
# concatenated features
concatenated_features = torch.concat((temp_features, precip_features), dim=1)

### 4. Load target data

In [24]:
# target data path
DATADIR_TARGET = './target'

# Specify the column names that you want to load
columns_to_load = ['year', 'wheat_yield'] # wheat yield per hectare

# Load the csv file
df = pd.read_csv(f'{DATADIR_TARGET}\\our-world-in-data-crop-yields.csv', usecols=columns_to_load)
df = df[df['year'] >= 1979][df['year'] < 2020]

# Now df will only contain the specified columns
print(df)

    year  wheat_yield
18  1979       1.8522
19  1980       1.8554
20  1981       1.8800
21  1982       1.9992
22  1983       2.1258
23  1984       2.2201
24  1985       2.1719
25  1986       2.3213
26  1987       2.2900
27  1988       2.2926
28  1989       2.3732
29  1990       2.5626
30  1991       2.4414
31  1992       2.5340
32  1993       2.5370
33  1994       2.4777
34  1995       2.4958
35  1996       2.5640
36  1997       2.7157
37  1998       2.7063
38  1999       2.7462
39  2000       2.7316
40  2001       2.7417
41  2002       2.7551
42  2003       2.6514
43  2004       2.9426
44  2005       2.8287
45  2006       2.8905
46  2007       2.8154
47  2008       3.0625
48  2009       3.0356
49  2010       2.9721
50  2011       3.1639
51  2012       3.0934
52  2013       3.2512
53  2014       3.3195
54  2015       3.3272
55  2016       3.4183
56  2017       3.5410
57  2018       3.4225
58  2019       3.5414


  df = df[df['year'] >= 1979][df['year'] < 2020]


In [25]:
tensor_target = torch.tensor(df['wheat_yield'].values)
tensor_target

tensor([1.8522, 1.8554, 1.8800, 1.9992, 2.1258, 2.2201, 2.1719, 2.3213, 2.2900,
        2.2926, 2.3732, 2.5626, 2.4414, 2.5340, 2.5370, 2.4777, 2.4958, 2.5640,
        2.7157, 2.7063, 2.7462, 2.7316, 2.7417, 2.7551, 2.6514, 2.9426, 2.8287,
        2.8905, 2.8154, 3.0625, 3.0356, 2.9721, 3.1639, 3.0934, 3.2512, 3.3195,
        3.3272, 3.4183, 3.5410, 3.4225, 3.5414], dtype=torch.float64)

### 5. Inputs and target for the LSTM block

In [21]:
# 12-month length of temperature and precipitation features
# temperature_features = torch.load(f'{DATADIR_TEMP}\\temperature_features.pt')
# precipitation_features = torch.load(f'{DATADIR_PRECIP}\\precipitation_features.pt')

for i in range(concatenated_features.shape[0]-12+1):
    if i == 0:
        concatenated_features_12 = concatenated_features[i:i+12].unsqueeze(0)
        #precipitation_features_12 = precipitation_features[i:i+12].unsqueeze(0)
    else:
        concatenated_features_12 = torch.concat((concatenated_features_12, concatenated_features[i:i+12].unsqueeze(0)), dim=0)
        #precipitation_features_12 = torch.concat((precipitation_features_12, precipitation_features[i:i+12].unsqueeze(0)), dim=0)

print(concatenated_features_12.shape)
#print(precipitation_features_12.shape)

torch.Size([481, 12, 1024])


In [22]:
# save lstm input data
INPUT = os.path.join(os.getcwd(), 'data')
if not os.path.exists(INPUT):
    os.makedirs(INPUT)

torch.save(concatenated_features_12, './data/lstm-inputs.pt')

In [26]:
# 12-month length of target data
target = torch.zeros(concatenated_features_12.shape[0],)

for i in range(target.shape[0]):
    q, r = divmod(i, 12)
    
    if r == 0:
        target[i] = tensor_target[q]
    else:
        target[i] = tensor_target[q+1]

print(target.shape)
print(target)

torch.Size([481])
tensor([1.8522, 1.8554, 1.8554, 1.8554, 1.8554, 1.8554, 1.8554, 1.8554, 1.8554,
        1.8554, 1.8554, 1.8554, 1.8554, 1.8800, 1.8800, 1.8800, 1.8800, 1.8800,
        1.8800, 1.8800, 1.8800, 1.8800, 1.8800, 1.8800, 1.8800, 1.9992, 1.9992,
        1.9992, 1.9992, 1.9992, 1.9992, 1.9992, 1.9992, 1.9992, 1.9992, 1.9992,
        1.9992, 2.1258, 2.1258, 2.1258, 2.1258, 2.1258, 2.1258, 2.1258, 2.1258,
        2.1258, 2.1258, 2.1258, 2.1258, 2.2201, 2.2201, 2.2201, 2.2201, 2.2201,
        2.2201, 2.2201, 2.2201, 2.2201, 2.2201, 2.2201, 2.2201, 2.1719, 2.1719,
        2.1719, 2.1719, 2.1719, 2.1719, 2.1719, 2.1719, 2.1719, 2.1719, 2.1719,
        2.1719, 2.3213, 2.3213, 2.3213, 2.3213, 2.3213, 2.3213, 2.3213, 2.3213,
        2.3213, 2.3213, 2.3213, 2.3213, 2.2900, 2.2900, 2.2900, 2.2900, 2.2900,
        2.2900, 2.2900, 2.2900, 2.2900, 2.2900, 2.2900, 2.2900, 2.2926, 2.2926,
        2.2926, 2.2926, 2.2926, 2.2926, 2.2926, 2.2926, 2.2926, 2.2926, 2.2926,
        2.2926, 2.3732

In [27]:
# save lstm target data
torch.save(target, './data/lstm-target.pt')