In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Imports/Helper Functions

15.128  - UV visible albedo for direct radiation - aluvp \\
16.128  - UV visible albedo for diffuse radiation - aluvd \\
17.128  - Near IR albedo for direct radiation - alnip \\
18.128  - Near IR albedo for diffuse radiation - alnid \\
26.128  - Lake cover (new at cycle 41r1) - cl \\
27.128  - Low vegetation cover - cvl \\
28.128  - High vegetation cover - cvh \\
43.128  - Soil type - slt \\
74.128  - Standard deviation of filtered subgrid orography - sdfor \\
129.128 - Geopotential (at the surface orograpphy *g) - z \\
134.128 - Surface pressure - sp \\
141.128 - Snow depth (used also for glaciers =10) - sd \\
160.128 - Standard deviation of orography - sdor \\
161.128 - Anisotropy of sub-gridscale orography - isor \\
162.128 - Angle of sub-gridscale orography - anor \\
163.128 - Slope of sub-gridscale orography - slor \\
172.128 - Land-sea mask - lsm \\
228.128 - Total precipitation - tp \\
243.128 - Forecast albedo - fal \\

7.228 - Lake depth  \\

35.235  - Mean surface downward short-wave radiation flux \\
36.235  - Mean surface downward long-wave radiation flux \\

128.254 - 2m temperature \\
129.254 - 2m dewpoint temperature \\
130.254 - 10m u-component of wind \\
131.254 - 10m v-component of wind \\


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor

from xgboost import XGBRegressor

In [None]:
def get_plot(lst_final_df, loc, year, month, pred_cols=['pred_lr','pred_dt','pred_rf', 'pred_xgb', 'pred_nn'], gt='LST_Day_1km_Max_mean'):
  if month == '02':
    month_name = 'Feb'
  else:
    month_name = 'Aug'

  df = lst_final_df[lst_final_df['Year'] == year][lst_final_df['Month'] == month][lst_final_df['Location'] == loc]
  dates = df['Day'].apply(lambda x : str(x))

  plt.figure(figsize=(15, 10))

  color = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]
  color_cnt = 0
  
  for i, col in enumerate(pred_cols): 
    plt.plot(dates, df[col], c=color[i], linestyle='--', marker='o', label=col)
  plt.scatter(dates, df[gt], c='k', marker='o', zorder=20, label=gt)
  # if era5: 
  #   plt.scatter(dates, df[era5], c='r', marker='^', zorder=20, label=era5)

  plt.xlabel('Date of the month')
  plt.ylabel('Temperature (in K)')
  plt.legend()
  plt.title(f'MODIS LST Daily vs. Predicted Skin Temperature ({loc}, {month_name} {year})')

  plt.grid(zorder=0)
  plt.show()

# Aggregating data of different years/months

In [None]:
src_feb = "/content/drive/MyDrive/ESOWC/feb/csvs/"
src_aug = "/content/drive/MyDrive/ESOWC/aug/csvs/"

csv_files_feb = os.listdir(src_feb)
csv_files_aug = os.listdir(src_aug)

In [None]:
# load one month file into memory for sanity check 
df = pd.read_csv(os.path.join(src_feb, "Feb_2000.csv"))
df.head()

Unnamed: 0,latitude,longitude,valid_time,ssrd,strd,tp,hybrid,t,u,v,depthBelowLandLayer,aluvp,aluvd,alnip,alnid,cl,cvl,cvh,istl1,slt,sdfor,z,sp,sd,sdor,isor,anor,slor,t2m,d2m,lsm,fal
0,75.0,-15.0,2000-02-01 12:00:00,0.0,509637.7,2.145767e-06,137.0,247.18149,2.84276,-8.513624,0.0,0.059999,0.060013,0.059994,0.059995,0.0,0.0,0.0,248.36812,0.0,0.0,-20.019043,101049.875,0.0,0.0,0.0,0.785398,0.0001,249.28589,246.61194,0.0,0.697397
1,75.0,-15.0,2000-02-02 12:00:00,0.0,587736.6,2.264976e-06,137.0,249.03334,3.646779,-2.670753,0.0,0.059992,0.06001,0.059994,0.059995,0.0,0.0,0.0,248.13177,0.0,0.0,-20.019043,101024.56,0.0,0.0,0.0,0.785398,0.0001,248.94292,246.28383,0.0,0.70415
2,75.0,-15.0,2000-02-03 12:00:00,0.0,599212.0,3.576279e-06,137.0,249.73878,4.094452,-1.716292,0.0,0.059995,0.059999,0.059994,0.059995,0.0,0.0,0.0,247.82341,0.0,0.0,-20.019043,99424.625,0.0,0.0,0.0,0.785398,0.0001,248.83267,246.3144,0.0,0.709857
3,75.0,-15.0,2000-02-04 12:00:00,0.0,604780.8,9.536743e-07,137.0,252.08395,0.872898,-4.212233,0.0,0.059997,0.060004,0.059994,0.059995,0.0,0.0,0.0,249.67134,0.0,0.0,-20.019043,100198.5,0.0,0.0,0.0,0.785398,0.0001,251.37527,249.13538,0.0,0.716862
4,75.0,-15.0,2000-02-05 12:00:00,0.0,828090.4,8.118153e-05,137.0,255.00777,0.305675,-4.120874,0.0,0.059991,0.060009,0.059994,0.059995,0.0,0.0,0.0,251.8063,0.0,0.0,-20.019043,99304.69,0.0,0.0,0.0,0.785398,0.0001,254.45789,252.46552,0.0,0.70972


In [None]:
# attempting to merge all the years files for different months 

##############################################################
# Crashes after processing Aug_2004.csv                      #
##############################################################



# Aug
df_lists = []

for f in csv_files_aug: 
  print("Filename: ", f)
  df = pd.read_csv(os.path.join(src_aug, f))
  df_lists.append(df)

result = pd.concat(df_lists, ignore_index=True)

#save the resulting dataframe
result.to_csv("./drive/MyDrive/ESOWC/aug/aug_data.csv")

# Feb 
df_lists = []

for f in csv_files_feb: 
  print("Filename: ",f)
  df = pd.read_csv(os.path.join(src_aug, f))
  df_lists.append(df)

result = pd.concat(df_lists, ignore_index=True)
result.to_csv("./drive/MyDrive/ESOWC/feb/feb_data.csv")

Filename:  Aug_2000.csv
Filename:  Aug_2001.csv
Filename:  Aug_2002.csv
Filename:  Aug_2003.csv
Filename:  Aug_2004.csv


# Merging the input and output files

In [None]:
result_feb = pd.read_csv("./drive/MyDrive/ESOWC/feb/feb_data.csv")
# result_feb = result_feb.drop(columns=['Unnamed: 0'])
# result_feb = result_feb.rename(columns={'city': 'Location', 'date': 'datetime'})

result_aug = pd.read_csv("./drive/MyDrive/ESOWC/aug/aug_data.csv")
# result_aug = result_aug.drop(columns=['Unnamed: 0'])
# result_aug = result_aug.rename(columns={'city': 'Location', 'date': 'datetime'})

result = pd.concat([result_feb, result_aug],  ignore_index=True)
result.head()